npm - @aws/ml-container-creator - Versions diffs - 0.13.5 → 0.15.0 - Mend

@aws/ml-container-creator 0.13.5 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/config/parameter-schema-v2.json +32 -4
package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
package/infra/ci-harness/package-lock.json +121 -111
package/infra/ci-harness/package.json +1 -1
package/package.json +2 -2
package/servers/instance-sizer/index.js +72 -4
package/servers/instance-sizer/lib/model-resolver.js +28 -2
package/src/app.js +15 -0
package/src/lib/config-loader.js +18 -0
package/src/lib/config-manager.js +6 -1
package/src/lib/dataset-slug.js +152 -0
package/src/lib/generated/cli-options.js +9 -3
package/src/lib/generated/parameter-matrix.js +14 -3
package/src/lib/generated/validation-rules.js +1 -1
package/src/lib/mcp-query-runner.js +6 -0
package/src/lib/prompt-runner.js +5 -0
package/src/lib/prompts/feature-prompts.js +1 -1
package/src/lib/template-manager.js +0 -7
package/src/lib/template-variable-resolver.js +51 -1
package/src/lib/tune-config-state.js +14 -1
package/templates/do/.benchmark_writer.py +9 -0
package/templates/do/.register_helper.py +1163 -0
package/templates/do/.tune_helper.py +168 -2
package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
package/templates/do/adapter +319 -27
package/templates/do/add-ic +85 -3
package/templates/do/benchmark +28 -8
package/templates/do/config +20 -0
package/templates/do/lib/inference-component.sh +56 -3
package/templates/do/register +552 -6
package/templates/do/test +12 -2
package/templates/do/tune +201 -6

package/servers/instance-sizer/index.js CHANGED Viewed

@@ -25,8 +25,8 @@ import { readFileSync } from 'node:fs';
 import { fileURLToPath } from 'node:url';
 import { resolve, dirname } from 'node:path';
 import { resolveModelMetadata } from './lib/model-resolver.js';
-import { estimateVram } from './lib/vram-estimator.js';
-import { filterAndRankInstances, applyAvailabilityRanking } from './lib/instance-ranker.js';
+import { estimateVram, computeMaxModelLen } from './lib/vram-estimator.js';
+import { filterAndRankInstances, applyAvailabilityRanking, getPerGpuMemoryGb } from './lib/instance-ranker.js';
 import { QuotaResolver } from './lib/quota-resolver.js';
 import { queryBedrock } from '../lib/bedrock-client.js';
@@ -393,6 +393,66 @@ async function handleGetInstanceRecommendation(params) {
         { limit }
     );
+    // Step 3-max_model_len: When no instance fits at full context, try capping context length
+    // NFR-1 guard: skip this logic for models with recommendedInstances in catalog
+    let suggestedMaxModelLen = null;
+    let contextLengthCapped = false;
+    let originalMaxPositionEmbeddings = null;
+    if (recommendations.length === 0 && !modelMetadata.recommendedInstances && modelMetadata.maxPositionEmbeddings) {
+        // Find the largest available GPU instance
+        const gpuInstances = Object.entries(effectiveCatalog)
+            .filter(([, meta]) => meta.category === 'gpu' && meta.gpus > 0)
+            .map(([name, meta]) => {
+                const perGpu = getPerGpuMemoryGb(meta);
+                return { name, meta, totalVramGb: perGpu ? perGpu * meta.gpus : 0 };
+            })
+            .filter(i => i.totalVramGb > 0)
+            .sort((a, b) => b.totalVramGb - a.totalVramGb);
+        if (gpuInstances.length > 0) {
+            const bestInstance = gpuInstances[0];
+            // Compute model weight memory for computeMaxModelLen
+            const weightsGb = vramEstimate.breakdown.weightsGb;
+            const safeLen = computeMaxModelLen({
+                modelWeightGb: weightsGb,
+                totalGpuMemoryGb: bestInstance.meta.gpuMemoryGb || (bestInstance.totalVramGb / bestInstance.meta.gpus),
+                gpuCount: bestInstance.meta.gpus,
+                numLayers: modelMetadata.numLayers,
+                numKvHeads: modelMetadata.numKvHeads,
+                headDim: modelMetadata.headDim
+            });
+            if (safeLen && safeLen.maxModelLen >= 2048) {
+                // Re-estimate VRAM with capped sequence length
+                const cappedEstimate = estimateVram({
+                    parameterCount: modelMetadata.parameterCount,
+                    dtype: modelMetadata.dtype,
+                    quantization: quantization || undefined,
+                    maxSequenceLength: safeLen.maxModelLen,
+                    batchSize: effectiveBatchSize || undefined
+                });
+                // Re-filter instances with the reduced VRAM requirement
+                recommendations = filterAndRankInstances(
+                    cappedEstimate.vramGb,
+                    effectiveCatalog,
+                    { limit }
+                );
+                suggestedMaxModelLen = safeLen.maxModelLen;
+                contextLengthCapped = true;
+                originalMaxPositionEmbeddings = modelMetadata.maxPositionEmbeddings;
+                log(`Context capped: ${modelMetadata.maxPositionEmbeddings} → ${safeLen.maxModelLen} for ${modelName}`);
+            } else {
+                // AC-1.6: safeLen < 2048 or null — recommend larger instance instead
+                log(`Model ${modelName} cannot fit 2048 context on ${bestInstance.name}, recommending larger instance`);
+            }
+        }
+    }
     // Step 3a: Quota & availability filtering (discover mode only)
     let preQuotaFilterCount = 0;
     let allFilteredByQuota = false;
@@ -521,7 +581,10 @@ async function handleGetInstanceRecommendation(params) {
         content: [{
             type: 'text',
             text: JSON.stringify({
-                values: { instanceType: topRecommendation },
+                values: {
+                    instanceType: topRecommendation,
+                    ...(suggestedMaxModelLen ? { maxModelLen: suggestedMaxModelLen } : {})
+                },
                 choices: { instanceType: rankedList },
                 metadata: {
                     modelName,
@@ -533,7 +596,12 @@ async function handleGetInstanceRecommendation(params) {
                     recommendations: finalRecommendations,
                     source: modelMetadata.source,
                     smartModeUsed,
-                    allFilteredByQuota
+                    allFilteredByQuota,
+                    ...(contextLengthCapped ? {
+                        suggestedMaxModelLen,
+                        contextLengthCapped: true,
+                        originalMaxPositionEmbeddings
+                    } : {})
                 }
             })
         }]

package/servers/instance-sizer/lib/model-resolver.js CHANGED Viewed

@@ -142,13 +142,27 @@ export function extractFromHuggingFaceConfig(config) {
     const architecture = (config.architectures && config.architectures[0]) || 'unknown';
     const maxPositionEmbeddings = config.max_position_embeddings || 4096;
-    return {
+    // Extract architecture params for KV cache computation (computeMaxModelLen)
+    const numLayers = config.num_hidden_layers || null;
+    const numKvHeads = config.num_key_value_heads || config.num_attention_heads || null;
+    const headDim = config.head_dim || (config.hidden_size && config.num_attention_heads
+        ? Math.floor(config.hidden_size / config.num_attention_heads)
+        : null);
+    const result = {
         parameterCount,
         dtype,
         architecture,
         maxPositionEmbeddings,
         source: 'huggingface_api'
     };
+    // Only include architecture params if available (graceful degradation)
+    if (numLayers) result.numLayers = numLayers;
+    if (numKvHeads) result.numKvHeads = numKvHeads;
+    if (headDim) result.headDim = headDim;
+    return result;
 }
 /**
@@ -175,13 +189,25 @@ export async function resolveModelMetadata(modelName, options = {}) {
     const catalogEntry = catalogLookup(modelName, catalog);
     if (catalogEntry) {
-        return {
+        const result = {
             parameterCount: catalogEntry.parameterCount,
             dtype: catalogEntry.defaultDtype || 'float16',
             architecture: catalogEntry.architecture || 'unknown',
             maxPositionEmbeddings: catalogEntry.maxPositionEmbeddings || 4096,
             source: 'catalog'
         };
+        // Pass through recommendedInstances for NFR-1 guard
+        if (catalogEntry.recommendedInstances) {
+            result.recommendedInstances = catalogEntry.recommendedInstances;
+        }
+        // Pass through architecture params if available in catalog
+        if (catalogEntry.numLayers) result.numLayers = catalogEntry.numLayers;
+        if (catalogEntry.numKvHeads) result.numKvHeads = catalogEntry.numKvHeads;
+        if (catalogEntry.headDim) result.headDim = catalogEntry.headDim;
+        return result;
     }
     // Step 2: If discover mode, try HuggingFace Hub

package/src/app.js CHANGED Viewed

@@ -402,6 +402,7 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
         ignorePatterns.push('**/do/.tune_helper.py');
         ignorePatterns.push('**/do/.stage_helper.py');
         ignorePatterns.push('**/do/.adapter_helper.py');
+        ignorePatterns.push('**/do/.register_helper.py');
         ignorePatterns.push('**/do/train');
         ignorePatterns.push('**/do/.train_build_request.py');
         ignorePatterns.push('**/do/.train_status_parser.py');
@@ -578,6 +579,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
             fs.writeFileSync(gitignorePath, mlccIgnore);
         }
     }
+    // Add __pycache__/ and *.pyc to .gitignore (Python helpers leave bytecode behind)
+    {
+        const gitignorePath = path.join(destDir, '.gitignore');
+        const pycacheIgnore = '# Python bytecode (generated by do/ helper scripts)\n__pycache__/\n*.pyc\n';
+        if (fs.existsSync(gitignorePath)) {
+            const existing = fs.readFileSync(gitignorePath, 'utf8');
+            if (!existing.includes('__pycache__')) {
+                fs.appendFileSync(gitignorePath, `\n${pycacheIgnore}`);
+            }
+        } else {
+            fs.writeFileSync(gitignorePath, pycacheIgnore);
+        }
+    }
 }
 /**

package/src/lib/config-loader.js CHANGED Viewed

@@ -265,6 +265,21 @@ export default class ConfigLoader {
                 return;
             }
+            // Handle icEnvVars object (deploy-time IC environment variables)
+            if (key === 'icEnvVars' && typeof value === 'object' && value !== null) {
+                if (!this.manager.config.icEnvVars) {
+                    this.manager.config.icEnvVars = {};
+                }
+                const cliIcEnvVars = (this.manager.explicitConfig && this.manager.explicitConfig.icEnvVars) || {};
+                Object.entries(value).forEach(([envKey, envValue]) => {
+                    if (!(envKey in cliIcEnvVars)) {
+                        this.manager.config.icEnvVars[envKey] = envValue;
+                        this.manager._recordSource(`icEnvVars.${envKey}`, envValue, 'config-file');
+                    }
+                });
+                return;
+            }
             if (this.manager._isSourceSupported(key, 'configFile')) {
                 filteredConfig[key] = this.manager._parseValue(key, value);
                 this.manager._recordSource(key, this.manager._parseValue(key, value), 'config-file');
@@ -342,6 +357,9 @@ export default class ConfigLoader {
         // Parse --server-env KEY=VALUE pairs
         this._parseEnvVarOptions('server-env', 'serverEnvVars');
+        // Parse --ic-env KEY=VALUE pairs (deploy-time IC environment variables)
+        this._parseEnvVarOptions('ic-env', 'icEnvVars');
     }
     /**

package/src/lib/config-manager.js CHANGED Viewed

@@ -183,6 +183,9 @@ export default class ConfigManager {
         if (this.config.serverEnvVars && typeof this.config.serverEnvVars === 'object') {
             finalConfig.serverEnvVars = { ...this.config.serverEnvVars };
         }
+        if (this.config.icEnvVars && typeof this.config.icEnvVars === 'object') {
+            finalConfig.icEnvVars = { ...this.config.icEnvVars };
+        }
         // Ensure all parameters from the matrix are included in final config
         // This is important for optional parameters that might be null
@@ -411,7 +414,8 @@ export default class ConfigManager {
             ...endpointParams,
             ...icParams,
             'modelEnvVars',
-            'serverEnvVars'
+            'serverEnvVars',
+            'icEnvVars'
         ]);
         const core = {};
         for (const [key, value] of Object.entries(this.config)) {
@@ -426,6 +430,7 @@ export default class ConfigManager {
             icConfig,
             modelEnvVars: { ...(this.config.modelEnvVars || {}) },
             serverEnvVars: { ...(this.config.serverEnvVars || {}) },
+            icEnvVars: { ...(this.config.icEnvVars || {}) },
             manifest: [...this._sourceManifest]
         };
     }

package/src/lib/dataset-slug.js ADDED Viewed

@@ -0,0 +1,152 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Dataset Slug Derivation
+ *
+ * Derives a deterministic, short slug from a dataset URI for use in
+ * tuning-job-aware adapter naming conventions.
+ *
+ * Slugification rules:
+ * - Lowercase
+ * - Strip non-alphanumeric characters (keep hyphens)
+ * - Truncate to 20 characters
+ * - Replace consecutive hyphens with single hyphen
+ * - Strip leading/trailing hyphens
+ *
+ * Examples:
+ *   hf://org/name           -> "name"
+ *   hf://tatsu-lab/alpaca   -> "alpaca"
+ *   hf://Open-Orca/OpenOrca -> "openorca"
+ *   s3://bucket/path/file.jsonl -> "file"
+ *
+ * Requirements: US-4 (AC-4.2)
+ */
+/**
+ * Derive a dataset slug from a dataset URI.
+ *
+ * @param {string} datasetUri - Dataset URI (s3://... or hf://...)
+ * @returns {string} The derived slug, or empty string if extraction fails
+ */
+export function deriveDatasetSlug(datasetUri) {
+    if (!datasetUri || typeof datasetUri !== 'string') {
+        return '';
+    }
+    let rawName = '';
+    if (datasetUri.startsWith('hf://')) {
+        // hf://org/name[/split][?file=pattern]
+        // Extract the dataset name (second path component)
+        const hfPath = datasetUri.slice(5); // remove "hf://"
+        const withoutQuery = hfPath.split('?')[0]; // remove ?file=...
+        const parts = withoutQuery.split('/');
+        // parts[0] = org, parts[1] = name, parts[2+] = split
+        rawName = parts[1] || parts[0] || '';
+    } else if (datasetUri.startsWith('s3://')) {
+        // s3://bucket/path/file.jsonl -> slug from filename (without extension)
+        const s3Path = datasetUri.slice(5); // remove "s3://"
+        const parts = s3Path.split('/');
+        const filename = parts[parts.length - 1] || '';
+        // Remove file extension
+        const dotIndex = filename.lastIndexOf('.');
+        rawName = dotIndex > 0 ? filename.substring(0, dotIndex) : filename;
+    } else {
+        // Unknown format — try to extract last path component
+        const parts = datasetUri.split('/');
+        rawName = parts[parts.length - 1] || '';
+    }
+    return slugify(rawName);
+}
+/**
+ * Apply slugification rules to a raw name.
+ *
+ * @param {string} raw - Raw name to slugify
+ * @returns {string} Slugified string
+ */
+export function slugify(raw) {
+    if (!raw) return '';
+    let slug = raw
+        .toLowerCase()                      // lowercase
+        .replace(/[^a-z0-9-]/g, '')         // strip non-alphanumeric (keep hyphens)
+        .replace(/-{2,}/g, '-')             // replace consecutive hyphens
+        .replace(/^-+/, '')                 // strip leading hyphens
+        .replace(/-+$/, '');                // strip trailing hyphens
+    // Truncate to 20 chars
+    if (slug.length > 20) {
+        slug = slug.substring(0, 20);
+        // Don't end on a hyphen after truncation
+        slug = slug.replace(/-+$/, '');
+    }
+    return slug;
+}
+/**
+ * Resolve a --from-tune argument to the appropriate config variable name.
+ *
+ * Resolution rules:
+ * - No arg (empty/null) -> TUNE_OUTPUT_PATH_LATEST
+ * - technique only (e.g., "sft") -> TUNE_ADAPTER_PATH_SFT
+ * - technique-dataset compound (e.g., "sft-alpaca") -> TUNE_ADAPTER_PATH_SFT_ALPACA
+ *
+ * @param {string} fromTuneArg - The --from-tune argument value
+ * @param {function} configVarExists - Function that checks if a config var exists
+ * @returns {{ varName: string, technique: string, slug: string, isCompound: boolean, fallback: string|null }}
+ */
+export function resolveFromTuneVar(fromTuneArg, configVarExists) {
+    if (!fromTuneArg) {
+        return {
+            varName: 'TUNE_OUTPUT_PATH_LATEST',
+            technique: '',
+            slug: '',
+            isCompound: false,
+            fallback: null
+        };
+    }
+    const upper = fromTuneArg.toUpperCase();
+    // Check if argument contains a hyphen — potential compound key
+    const hyphenIndex = fromTuneArg.indexOf('-');
+    if (hyphenIndex > 0) {
+        const technique = fromTuneArg.substring(0, hyphenIndex);
+        const slug = fromTuneArg.substring(hyphenIndex + 1);
+        const techniqueUpper = technique.toUpperCase();
+        const slugUpper = slug.toUpperCase().replace(/-/g, '_');
+        const compoundVar = `TUNE_ADAPTER_PATH_${techniqueUpper}_${slugUpper}`;
+        if (configVarExists(compoundVar)) {
+            return {
+                varName: compoundVar,
+                technique,
+                slug,
+                isCompound: true,
+                fallback: null
+            };
+        }
+        // Compound key doesn't exist — fallback to technique-only
+        return {
+            varName: `TUNE_ADAPTER_PATH_${techniqueUpper}`,
+            technique,
+            slug,
+            isCompound: false,
+            fallback: compoundVar // the compound var that was tried but didn't exist
+        };
+    }
+    // No hyphen — technique-only
+    return {
+        varName: `TUNE_ADAPTER_PATH_${upper}`,
+        technique: fromTuneArg,
+        slug: '',
+        isCompound: false,
+        fallback: null
+    };
+}

package/src/lib/generated/cli-options.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-15T20:16:03.840Z
+// Generated: 2026-06-22T13:49:00.815Z
 /**
  * CLI option definitions derived from parameter-schema-v2.json.
@@ -70,7 +70,7 @@ export const cliOptions = [
     {
         'flag': '--enable-lora',
         'description': 'Enable LoRA adapter serving',
-        'defaultValue': false
+        'defaultValue': true
     },
     {
         'flag': '--max-loras <n>',
@@ -85,7 +85,7 @@ export const cliOptions = [
     {
         'flag': '--include-benchmark',
         'description': 'Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.',
-        'defaultValue': false
+        'defaultValue': true
     },
     {
         'flag': '--benchmark-concurrency <n>',
@@ -353,6 +353,11 @@ export const cliOptions = [
         'description': 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)',
         'repeatable': true
     },
+    {
+        'flag': '--ic-env <KEY=VALUE>',
+        'description': 'Deploy-time environment variable for inference components (IC_ENV_* prefix), repeatable (e.g. VLLM_MAX_MODEL_LEN=8192)',
+        'repeatable': true
+    },
     {
         'flag': '--include-sample',
         'description': 'Include sample model code',
@@ -464,6 +469,7 @@ export const helpGroups = {
     '--fsx-volume-handle': 'hyperpod',
     '--model-env': 'env',
     '--server-env': 'env',
+    '--ic-env': 'ic',
     '--include-sample': 'features',
     '--include-testing': 'features',
     '--test-types': 'features',

package/src/lib/generated/parameter-matrix.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-parameter-matrix.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-15T20:16:03.952Z
+// Generated: 2026-06-22T13:49:00.924Z
 /**
  * Parameter matrix defining how each parameter is loaded from various sources.
@@ -106,7 +106,7 @@ export const parameterMatrix = {
         'mcp': false,
         'promptable': true,
         'required': false,
-        'default': false,
+        'default': true,
         'valueSpace': 'bounded'
     },
     'maxLoras': {
@@ -139,7 +139,7 @@ export const parameterMatrix = {
         'mcp': false,
         'promptable': true,
         'required': false,
-        'default': false,
+        'default': true,
         'valueSpace': 'bounded'
     },
     'benchmarkConcurrency': {
@@ -569,6 +569,17 @@ export const parameterMatrix = {
         'default': null,
         'valueSpace': 'unbounded'
     },
+    'icEnv': {
+        'cliOption': 'ic-env',
+        'envVar': null,
+        'configFile': true,
+        'packageJson': false,
+        'mcp': false,
+        'promptable': false,
+        'required': false,
+        'default': [],
+        'valueSpace': 'unbounded'
+    },
     'includeSampleModel': {
         'cliOption': 'include-sample',
         'envVar': 'ML_INCLUDE_SAMPLE',

package/src/lib/generated/validation-rules.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-validator.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-15T20:16:03.877Z
+// Generated: 2026-06-22T13:49:00.849Z
 /**
  * Validation rules derived from parameter-schema-v2.json.

package/src/lib/mcp-query-runner.js CHANGED Viewed

@@ -216,6 +216,12 @@ export default class McpQueryRunner {
                 if (parsed.choices?.instanceType?.length > 0) {
                     this.runner._instanceSizerMetadata = parsed.metadata || null;
+                    // Store maxModelLen from sizer if context was capped (AC-1.7)
+                    if (parsed.values?.maxModelLen) {
+                        this.runner._sizerMaxModelLen = parsed.values.maxModelLen;
+                        console.log(`   ✓ Context length capped: max_model_len=${parsed.values.maxModelLen}`);
+                    }
                     // Build display labels with VRAM estimate and utilization percentage
                     const recommendations = parsed.metadata?.recommendations || [];
                     const estimatedVramGb = parsed.metadata?.estimatedVramGb;

package/src/lib/prompt-runner.js CHANGED Viewed

@@ -710,6 +710,11 @@ export default class PromptRunner {
             delete combinedAnswers.customHyperPodCluster;
         }
+        // Propagate max_model_len from instance-sizer context capping (AC-1.7)
+        if (this._sizerMaxModelLen) {
+            combinedAnswers.sizerMaxModelLen = this._sizerMaxModelLen;
+        }
         // Apply CUDA version selection → inference AMI override
         if (combinedAnswers._resolvedInferenceAmiVersion) {
             combinedAnswers.inferenceAmiVersion = combinedAnswers._resolvedInferenceAmiVersion;

package/src/lib/prompts/feature-prompts.js CHANGED Viewed

@@ -90,7 +90,7 @@ const loraPrompts = [
         type: 'confirm',
         name: 'enableLora',
         message: 'Enable LoRA adapter serving?',
-        default: false,
+        default: true,
         when: (answers) => {
             const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
             const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');

package/src/lib/template-manager.js CHANGED Viewed

@@ -314,13 +314,6 @@ export default class TemplateManager {
     _validateBenchmarkConfig() {
         if (!this.answers.includeBenchmark) return;
-        // Gate to supported architectures
-        const dc = this.answers.deploymentConfig;
-        const arch = dc ? dc.split('-')[0] : this.answers.architecture;
-        if (arch !== 'transformers' && arch !== 'diffusors') {
-            throw new Error('⚠️  Benchmarking is only supported with transformers and diffusors architectures.');
-        }
         // Gate to supported deployment targets
         if (this.answers.deploymentTarget === 'hyperpod-eks') {
             throw new Error('⚠️  Benchmarking is only supported with managed-inference, async-inference, and batch-transform deployment targets');

package/src/lib/template-variable-resolver.js CHANGED Viewed

@@ -232,7 +232,7 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
         artifactUri: '',
         modelLoadStrategy: 'runtime',
         existingEndpointName: null,
-        enableLora: false,
+        enableLora: true,
         maxLoras: 30,
         maxLoraRank: 64
     };
@@ -261,6 +261,20 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
         }
     }
+    // Always include benchmarking by default (AC-2.3 — enabled for all architectures).
+    // Only set when not explicitly provided by user (AC-2.4, AC-2.7 — respect explicit opt-out).
+    if (answers.includeBenchmark === undefined) {
+        answers.includeBenchmark = true;
+    }
+    // Enforce enableLora scoping: only LoRA-capable servers get enableLora=true
+    // (AC-2.1, NFR-2). All incompatible backends are forced to false.
+    const loraCapableServers = ['vllm', 'sglang', 'djl-lmi', 'lmi', 'djl'];
+    const resolvedBackend = answers.backend || answers.modelServer;
+    if (!loraCapableServers.includes(resolvedBackend)) {
+        answers.enableLora = false;
+    }
     // Merge catalog env vars into answers.envVars with correct precedence
     await _mergeEnvVarsWithPrecedence(answers, registryConfigManager);
@@ -445,6 +459,35 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
         }
     }
+    // Propagate max_model_len from instance-sizer context capping to env vars (AC-1.7).
+    // The instance-sizer sets sizerMaxModelLen when the model's full context doesn't fit
+    // on the recommended instance. Write as VLLM_MAX_MODEL_LEN or SGLANG_MAX_MODEL_LEN.
+    const _MAX_MODEL_LEN_ENGINE_MAP = {
+        'vllm': 'VLLM_MAX_MODEL_LEN',
+        'vllm-omni': 'VLLM_MAX_MODEL_LEN',
+        'sglang': 'SGLANG_MAX_MODEL_LEN'
+    };
+    if (answers.sizerMaxModelLen) {
+        const maxLenEngine = answers.backend || answers.modelServer;
+        const maxLenEnvKey = maxLenEngine ? _MAX_MODEL_LEN_ENGINE_MAP[maxLenEngine] : null;
+        if (maxLenEnvKey) {
+            // Only set if user hasn't explicitly provided this env var
+            const userServerEnvVars = answers.serverEnvVars || {};
+            const userExplicitlySetMaxLen = (
+                userServerEnvVars['MAX_MODEL_LEN'] !== undefined ||
+                userServerEnvVars[maxLenEnvKey] !== undefined
+            );
+            if (!userExplicitlySetMaxLen && (!answers.envVars || !answers.envVars[maxLenEnvKey])) {
+                if (!answers.envVars) {
+                    answers.envVars = {};
+                }
+                answers.envVars[maxLenEnvKey] = String(answers.sizerMaxModelLen);
+                console.log(`    ℹ️  max_model_len: ${answers.sizerMaxModelLen} (context capped by instance-sizer)`);
+            }
+        }
+    }
     // Determine tune support based on model presence in the tune catalog.
     // Used by the do/config template to write TUNE_SUPPORTED=true|false.
     if (answers.tuneSupported === undefined) {
@@ -481,4 +524,11 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
             answers.tuneModelId = null;
         }
     }
+    // Propagate --ic-env KEY=VALUE pairs to icEnvVars for do/config template rendering.
+    // These are rendered as IC_ENV_* exports in do/config, which inference-component.sh
+    // reads at deploy time and passes as the Environment field in InferenceComponent.create().
+    if (!answers.icEnvVars) {
+        answers.icEnvVars = {};
+    }
 }

package/src/lib/tune-config-state.js CHANGED Viewed

@@ -74,22 +74,35 @@ export function persistSubmissionState(configPath, { technique, trainingType, da
  * Simulate the config writes that happen after a job completes successfully.
  * This mirrors the behavior in do/tune's _handle_completion() function.
  *
+ * Writes three levels of tracking (AC-4.1, AC-4.2):
+ * - Level 1: TUNE_OUTPUT_PATH_LATEST (always the last run, any technique)
+ * - Level 2: TUNE_ADAPTER_PATH_<TECHNIQUE> (last run per technique)
+ * - Level 3: TUNE_ADAPTER_PATH_<TECHNIQUE>_<SLUG> (per technique + dataset slug)
+ *
  * @param {string} configPath - Path to the config file
  * @param {object} params - Completion parameters
  * @param {string} params.technique - Technique (sft, dpo, rlaif, rlvr)
  * @param {string} params.trainingType - Training type (lora, full-rank)
  * @param {string} params.artifactPath - S3 path to the output artifact
  * @param {string} params.outputType - Output type (adapter, full-model)
+ * @param {string} [params.datasetSlug] - Optional dataset slug for per-technique-per-dataset tracking
  */
-export function persistCompletionState(configPath, { technique, trainingType, artifactPath, outputType }) {
+export function persistCompletionState(configPath, { technique, trainingType, artifactPath, outputType, datasetSlug }) {
     const techniqueUpper = technique.toUpperCase();
     if (trainingType === 'lora') {
+        // Level 2: per-technique
         updateConfigVar(configPath, `TUNE_ADAPTER_PATH_${techniqueUpper}`, artifactPath);
+        // Level 3: per-technique + per-dataset (if slug available)
+        if (datasetSlug) {
+            const slugUpper = datasetSlug.toUpperCase().replace(/-/g, '_');
+            updateConfigVar(configPath, `TUNE_ADAPTER_PATH_${techniqueUpper}_${slugUpper}`, artifactPath);
+        }
     } else if (trainingType === 'full-rank') {
         updateConfigVar(configPath, `TUNE_MODEL_PATH_${techniqueUpper}`, artifactPath);
     }
+    // Level 1: latest
     updateConfigVar(configPath, 'TUNE_OUTPUT_PATH_LATEST', artifactPath);
     updateConfigVar(configPath, 'TUNE_OUTPUT_TYPE_LATEST', outputType);
 }