npm - @aws/ml-container-creator - Versions diffs - 1.0.3 → 1.0.4 - Mend

@aws/ml-container-creator 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/README.md +1 -1
package/infra/ci-harness/lib/ci-harness-stack.ts +43 -0
package/package.json +2 -2
package/servers/base-image-picker/index.js +65 -18
package/servers/instance-sizer/index.js +32 -0
package/servers/lib/catalogs/fleet-drivers.json +38 -0
package/servers/lib/catalogs/model-arch-support.json +51 -0
package/servers/lib/catalogs/model-servers.json +2842 -1730
package/servers/lib/schemas/image-catalog.schema.json +12 -0
package/src/app.js +6 -4
package/src/lib/generated/cli-options.js +1 -1
package/src/lib/generated/parameter-matrix.js +1 -1
package/src/lib/generated/validation-rules.js +1 -1
package/src/lib/mcp-query-runner.js +110 -3
package/src/lib/prompt-runner.js +66 -22
package/src/lib/template-variable-resolver.js +8 -0
package/src/lib/train-config-builder.js +339 -0
package/templates/do/.benchmark_writer.py +3 -0
package/templates/do/.eval_helper.py +409 -0
package/templates/do/.register_helper.py +185 -11
package/templates/do/.train_build_request.py +102 -113
package/templates/do/.train_helper.py +433 -0
package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
package/templates/do/adapter +157 -0
package/templates/do/benchmark +60 -3
package/templates/do/deploy.d/managed-inference.ejs +83 -0
package/templates/do/evaluate +272 -0
package/templates/do/lib/resolve-instance.sh +155 -0
package/templates/do/register +5 -0
package/templates/do/test +1 -0
package/templates/do/train +879 -126
package/templates/do/training/config.yaml +83 -11
package/templates/do/training/dpo/accelerate_config.yaml +24 -0
package/templates/do/training/dpo/defaults.yaml +26 -0
package/templates/do/training/dpo/prompts.json +8 -0
package/templates/do/training/dpo/train.py +363 -0
package/templates/do/training/sft/accelerate_config.yaml +22 -0
package/templates/do/training/sft/defaults.yaml +18 -0
package/templates/do/training/sft/prompts.json +7 -0
package/templates/do/training/sft/train.py +310 -0
package/templates/do/tune +11 -2
package/templates/do/.train_poll_parser.py +0 -135
package/templates/do/.train_status_parser.py +0 -187
/package/templates/do/training/{train.py → custom/train.py} +0 -0

package/README.md CHANGED Viewed

@@ -97,7 +97,7 @@ Full documentation is available at [awslabs.github.io/ml-container-creator](http
 ### Python dependencies
-The `do/` lifecycle scripts (`do/tune`, `do/stage`, `do/adapter`) require Python packages. Install them in your Python environment before first use:
+The `do/` lifecycle scripts (`do/tune`, `do/train`, `do/stage`, `do/adapter`) require Python packages. Install them in your Python environment before first use:
 ```bash
 # Recommended (fast):

package/infra/ci-harness/lib/ci-harness-stack.ts CHANGED Viewed

@@ -1057,6 +1057,49 @@ export class MlccCiHarnessStack extends cdk.Stack {
         glueTable.addDependency(glueDatabase);
         glueTable.cfnOptions.condition = benchmarkInfraCondition;
+        // Glue Table: mlcc_evaluations — model quality evaluation results
+        // Written by do/evaluate via .eval_helper.py eval-write subcommand.
+        // Partitioned by model + adapter for efficient comparison queries.
+        const evalGlueTable = new glue.CfnTable(this, 'EvaluationResultsTable', {
+            catalogId: this.account,
+            databaseName: 'mlcc_ci',
+            tableInput: {
+                name: 'mlcc_evaluations',
+                tableType: 'EXTERNAL_TABLE',
+                parameters: {
+                    'classification': 'json',
+                },
+                storageDescriptor: {
+                    columns: [
+                        { name: 'project_name', type: 'string', comment: 'MCC project name' },
+                        { name: 'model_name', type: 'string', comment: 'HuggingFace model ID' },
+                        { name: 'adapter_name', type: 'string', comment: 'Adapter name or IC name' },
+                        { name: 'technique', type: 'string', comment: 'Training technique (sft, dpo)' },
+                        { name: 'eval_dataset', type: 'string', comment: 'Evaluation dataset URI or name' },
+                        { name: 'samples_evaluated', type: 'int', comment: 'Number of samples evaluated' },
+                        { name: 'metrics', type: 'string', comment: 'JSON blob of all computed metrics' },
+                        { name: 'timestamp', type: 'string', comment: 'ISO 8601 UTC timestamp' },
+                        { name: 'region', type: 'string', comment: 'AWS region' },
+                    ],
+                    location: `s3://mlcc-benchmark-results-${this.account}-${this.region}/evaluations/`,
+                    inputFormat: 'org.apache.hadoop.mapred.TextInputFormat',
+                    outputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',
+                    serdeInfo: {
+                        serializationLibrary: 'org.openx.data.jsonserde.JsonSerDe',
+                        parameters: {
+                            'serialization.format': '1',
+                        },
+                    },
+                },
+                partitionKeys: [
+                    { name: 'model', type: 'string', comment: 'Model name (partition key)' },
+                    { name: 'adapter', type: 'string', comment: 'Adapter name (partition key)' },
+                ],
+            },
+        });
+        evalGlueTable.addDependency(glueDatabase);
+        evalGlueTable.cfnOptions.condition = benchmarkInfraCondition;
         // Configurable lifecycle parameters for the benchmark results bucket
         const benchmarkIaTransitionDays = new cdk.CfnParameter(this, 'BenchmarkIaTransitionDays', {
             type: 'Number',

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aws/ml-container-creator",
-  "version": "1.0.3",
+  "version": "1.0.4",
   "description": "Build and deploy custom ML containers on AWS SageMaker with minimal configuration.",
   "main": "src/index.js",
   "bin": {
@@ -88,7 +88,7 @@
   },
   "scripts": {
     "test": "mocha 'test/**/*.test.js' --ignore 'test/property/**' --recursive --timeout 30000 --parallel",
-    "test:property": "mocha 'test/property/**/*.test.js' --recursive --timeout 60000 --parallel",
+    "test:property": "NODE_OPTIONS='--max-old-space-size=8192' mocha 'test/property/**/*.test.js' --recursive --timeout 60000 --parallel --jobs 4",
     "test:all": "npm run test && npm run test:property",
     "test:fast": "mocha 'test/**/*.test.js' --recursive --timeout 15000 --parallel",
     "test:unit": "mocha 'test/unit/**/*.test.js' --recursive --timeout 15000",

package/servers/base-image-picker/index.js CHANGED Viewed

@@ -25,6 +25,8 @@ import { readFileSync } from 'node:fs';
 import { fileURLToPath } from 'node:url';
 import { resolve, dirname } from 'node:path';
 import { DynamicResolver as DynamicResolverBase } from '../lib/dynamic-resolver.js';
+import { filterImages, deriveMinDriverVersion } from '../lib/image-filter.js';
+import { resolveModelArchitecture } from '../lib/model-id-resolver.js';
 // ── Catalog loader ───────────────────────────────────────────────────────────
@@ -156,15 +158,25 @@ class DynamicResolver extends ImageResolver {
             }
             const data = await response.json();
-            const images = (data.results || []).map(tag => ({
-                image: `${this._repoForFramework(framework)}:${tag.name}`,
-                tag: tag.name,
-                architecture: 'amd64',
-                created: tag.last_updated || tag.tag_last_pushed || new Date().toISOString(),
-                labels: {},
-                registry: 'dockerhub',
-                repository: this._repoForFramework(framework)
-            }));
+            const images = (data.results || []).map(tag => {
+                const entry = {
+                    image: `${this._repoForFramework(framework)}:${tag.name}`,
+                    tag: tag.name,
+                    architecture: 'amd64',
+                    created: tag.last_updated || tag.tag_last_pushed || new Date().toISOString(),
+                    labels: {},
+                    registry: 'dockerhub',
+                    repository: this._repoForFramework(framework)
+                };
+                // Derive min_driver_version from CUDA version in tag or labels
+                const minDriver = deriveMinDriverVersion(entry);
+                if (minDriver) {
+                    entry.min_driver_version = minDriver;
+                }
+                return entry;
+            });
             return {
                 images: images.slice(0, limit),
@@ -375,7 +387,9 @@ if (discoverMode) {
  * When discover mode is active, merges static and dynamic results.
  */
 async function resolveBaseImage(context, limit) {
-    const { framework, modelServer, searchCriteria, architecture } = context;
+    const { framework, modelServer, searchCriteria, architecture,
+        instanceType, driverVersion, inferenceAmiVersion,
+        tensorParallelSize, modelArchitecture, modelId } = context;
     // Determine which framework identifier to resolve
     let resolverKey;
@@ -398,21 +412,52 @@ async function resolveBaseImage(context, limit) {
     if (discoverMode && dynamicResolver && dynamicResolver.supportedFrameworks().includes(resolverKey)) {
         // Fetch both static and dynamic results, then merge
-        const staticResult = await staticResolver.fetchImages(resolverKey, { limit, searchCriteria });
+        const staticResult = await staticResolver.fetchImages(resolverKey, { limit: limit * 3, searchCriteria });
         const dynamicResult = await dynamicResolver.fetchImages(resolverKey, { limit: 5 });
-        resultImages = mergeStaticAndDynamic(staticResult.images, dynamicResult.images, limit);
+        resultImages = mergeStaticAndDynamic(staticResult.images, dynamicResult.images, limit * 3);
     } else {
-        // Static-only path (no network calls)
-        const result = await resolver.fetchImages(resolverKey, { limit, searchCriteria });
+        // Static-only path (no network calls) — fetch extra to allow for filtering
+        const fetchLimit = (instanceType || driverVersion || modelArchitecture || modelId) ? limit * 3 : limit;
+        const result = await resolver.fetchImages(resolverKey, { limit: fetchLimit, searchCriteria });
         resultImages = result.images;
     }
+    // ── Resolve modelId → modelArchitecture if needed ───────────────────
+    let resolvedModelArchitecture = modelArchitecture || '';
+    if (!modelArchitecture && modelId) {
+        const arch = await resolveModelArchitecture(modelId);
+        if (arch) {
+            resolvedModelArchitecture = arch;
+        }
+    }
+    // ── Apply driver-aware + model-architecture filtering ─────────────────
+    let filterMetadata = null;
+    if (instanceType || driverVersion || inferenceAmiVersion || resolvedModelArchitecture) {
+        const filterResult = filterImages(resultImages, {
+            framework: resolverKey,
+            instanceType,
+            driverVersion,
+            inferenceAmiVersion,
+            tensorParallelSize: tensorParallelSize || 1,
+            modelArchitecture: resolvedModelArchitecture
+        });
+        resultImages = filterResult.images;
+        filterMetadata = filterResult.metadata;
+    }
+    // Apply final limit after filtering
+    resultImages = resultImages.slice(0, limit);
     const images = resultImages.map(e => e.image);
     return {
         values: { baseImage: images[0] || null },
         choices: { baseImage: images },
-        metadata: { baseImage: resultImages }
+        metadata: {
+            baseImage: resultImages,
+            ...(filterMetadata ? { driverFilter: filterMetadata } : {})
+        }
     };
 }
@@ -432,11 +477,11 @@ const server = new McpServer({
 server.tool(
     'get_base_images',
-    'Returns curated base container images for ML Container Creator Dockerfiles',
+    'Returns curated base container images for ML Container Creator Dockerfiles. Supports driver-aware filtering when instanceType is provided — excludes images incompatible with the fleet GPU driver, especially for multi-GPU tensor-parallel deployments.',
     {
         parameters: z.array(z.string()).describe('List of parameter names to provide values for'),
         limit: z.number().int().positive().default(5).describe('Maximum number of choices per parameter'),
-        context: z.record(z.string(), z.any()).optional().describe('Current configuration context (framework, modelServer, searchCriteria)')
+        context: z.record(z.string(), z.any()).optional().describe('Configuration context. Supports: framework, modelServer, searchCriteria, architecture, instanceType (triggers driver filtering), driverVersion (override), inferenceAmiVersion (resolves to driver), tensorParallelSize (TP>1 = strict filtering), modelId, modelArchitecture (excludes old framework versions)')
     },
     async ({ parameters, limit, context }) => {
         const values = {};
@@ -472,10 +517,12 @@ export {
     TRITON_IMAGE_CATALOG,
     resolveBaseImage,
     mergeStaticAndDynamic,
+    filterImages,
     registry,
     staticResolver,
     dynamicResolver,
-    discoverMode
+    discoverMode,
+    resolveModelArchitecture
 };
 export { DynamicResolverBase as DynamicResolverBase };

package/servers/instance-sizer/index.js CHANGED Viewed

@@ -393,6 +393,38 @@ async function handleGetInstanceRecommendation(params) {
         { limit }
     );
+    // Step 3-recommended: When VRAM filter returns empty but catalog has recommendedInstances,
+    // use those as the fallback (they represent tested/validated deployments).
+    if (recommendations.length === 0 && modelMetadata.recommendedInstances && modelMetadata.recommendedInstances.length > 0) {
+        for (const instanceType of modelMetadata.recommendedInstances) {
+            const meta = effectiveCatalog[instanceType];
+            if (meta) {
+                const perGpuMemory = getPerGpuMemoryGb(meta);
+                const gpuCount = meta.gpus || 1;
+                const totalVramGb = perGpuMemory ? perGpuMemory * gpuCount : null;
+                recommendations.push({
+                    instanceType,
+                    gpuCount,
+                    totalVramGb,
+                    utilizationPercent: totalVramGb ? Math.round((vramEstimate.vramGb / totalVramGb) * 100) : null,
+                    tensorParallelism: gpuCount,
+                    costTier: meta.costTier || null
+                });
+            } else {
+                // Instance not in catalog but listed as recommended — still include it
+                recommendations.push({
+                    instanceType,
+                    gpuCount: null,
+                    totalVramGb: null,
+                    utilizationPercent: null,
+                    tensorParallelism: null,
+                    costTier: null
+                });
+            }
+        }
+        log(`Using catalog recommendedInstances for "${modelName}" (VRAM filter returned empty)`);
+    }
     // Step 3-max_model_len: When no instance fits at full context, try capping context length
     // NFR-1 guard: skip this logic for models with recommendedInstances in catalog
     let suggestedMaxModelLen = null;

package/servers/lib/catalogs/fleet-drivers.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "_comment": "Instance family → GPU driver version mapping for SageMaker inference fleet. Source: AWS docs (inference-gpu-drivers.html) + empirical validation. Updated quarterly or when AWS announces fleet driver updates.",
+  "_last_updated": "2026-06-29",
+  "instance_families": {
+    "g4dn": { "driver": "535.183", "cuda_native": "12.2", "gpu": "T4", "gpu_memory_gb": 16 },
+    "g5":   { "driver": "550.163", "cuda_native": "12.4", "gpu": "A10G", "gpu_memory_gb": 24 },
+    "g5n":  { "driver": "550.163", "cuda_native": "12.4", "gpu": "A10G", "gpu_memory_gb": 24 },
+    "g6":   { "driver": "560.35",  "cuda_native": "12.6", "gpu": "L4", "gpu_memory_gb": 24 },
+    "g6e":  { "driver": "560.35",  "cuda_native": "12.6", "gpu": "L40S", "gpu_memory_gb": 48 },
+    "p4d":  { "driver": "550.163", "cuda_native": "12.4", "gpu": "A100", "gpu_memory_gb": 40 },
+    "p4de": { "driver": "550.163", "cuda_native": "12.4", "gpu": "A100", "gpu_memory_gb": 80 },
+    "p5":   { "driver": "580.95",  "cuda_native": "12.9", "gpu": "H100", "gpu_memory_gb": 80 },
+    "p5e":  { "driver": "580.95",  "cuda_native": "12.9", "gpu": "H200", "gpu_memory_gb": 141 },
+    "trn1": null,
+    "trn2": null,
+    "inf2": null
+  },
+  "ami_versions": {
+    "_comment": "InferenceAmiVersion enum → driver version mapping. From SDK enum + empirical.",
+    "al2-ami-sagemaker-inference-gpu-2":   { "driver": "535.183", "cuda_native": "12.2" },
+    "al2-ami-sagemaker-inference-gpu-2-1": { "driver": "535.216", "cuda_native": "12.2" },
+    "al2-ami-sagemaker-inference-gpu-3-1": { "driver": "550.163", "cuda_native": "12.4" },
+    "al2023-ami-sagemaker-inference-gpu-4-1": { "driver": "570.86", "cuda_native": "12.8" }
+  },
+  "cuda_to_min_driver": {
+    "_comment": "CUDA toolkit version → minimum required driver (Linux data center). Source: NVIDIA CUDA compatibility docs.",
+    "12.0": "525.60",
+    "12.1": "525.60",
+    "12.2": "535.54",
+    "12.3": "535.54",
+    "12.4": "550.54",
+    "12.5": "555.42",
+    "12.6": "560.28",
+    "12.7": "565.57",
+    "12.8": "570.86",
+    "12.9": "580.00"
+  }
+}

package/servers/lib/catalogs/model-arch-support.json ADDED Viewed

@@ -0,0 +1,51 @@
+{
+  "_comment": "Model architecture → minimum framework version mapping. Used by driver-aware filtering to exclude framework versions that don't support a given model architecture. Source: vLLM/SGLang release notes and supported_models docs.",
+  "_last_updated": "2026-06-29",
+  "vllm": {
+    "LlamaForCausalLM": "v0.4.0",
+    "Llama4ForCausalLM": "v0.22.0",
+    "MistralForCausalLM": "v0.4.0",
+    "MixtralForCausalLM": "v0.4.0",
+    "Qwen2ForCausalLM": "v0.6.0",
+    "Qwen2MoeForCausalLM": "v0.6.0",
+    "Qwen3ForCausalLM": "v0.20.0",
+    "Qwen3MoeForCausalLM": "v0.20.0",
+    "DeepseekV2ForCausalLM": "v0.16.0",
+    "DeepseekV3ForCausalLM": "v0.19.0",
+    "Gemma2ForCausalLM": "v0.8.0",
+    "Gemma3ForCausalLM": "v0.20.0",
+    "Gemma4ForCausalLM": "v0.23.0",
+    "GptOssForCausalLM": "v0.22.0",
+    "NemotronForCausalLM": "v0.17.0",
+    "Phi3ForCausalLM": "v0.6.0",
+    "PhiMoEForCausalLM": "v0.16.0",
+    "GraniteForCausalLM": "v0.17.0",
+    "GraniteMoeForCausalLM": "v0.19.0",
+    "CohereForCausalLM": "v0.16.0",
+    "Cohere2ForCausalLM": "v0.19.0",
+    "FalconForCausalLM": "v0.4.0",
+    "StarCoder2ForCausalLM": "v0.6.0",
+    "InternLM2ForCausalLM": "v0.6.0",
+    "OlmoForCausalLM": "v0.16.0",
+    "Olmo2ForCausalLM": "v0.19.0"
+  },
+  "sglang": {
+    "LlamaForCausalLM": "v0.3.0",
+    "MistralForCausalLM": "v0.3.0",
+    "Qwen2ForCausalLM": "v0.4.0",
+    "Qwen3ForCausalLM": "v0.5.0",
+    "Qwen3MoeForCausalLM": "v0.5.0",
+    "DeepseekV2ForCausalLM": "v0.4.0",
+    "DeepseekV3ForCausalLM": "v0.5.0",
+    "Gemma2ForCausalLM": "v0.4.0",
+    "Gemma3ForCausalLM": "v0.5.0",
+    "Phi3ForCausalLM": "v0.4.0",
+    "InternLM2ForCausalLM": "v0.4.0"
+  },
+  "lmi": {
+    "_comment": "DJL LMI uses HuggingFace transformers directly — architecture support is determined by the bundled transformers version, not the LMI version itself. No version-based filtering needed for LMI.",
+    "LlamaForCausalLM": "v0.25.0",
+    "Qwen2ForCausalLM": "v0.27.0",
+    "DeepseekV2ForCausalLM": "v0.28.0"
+  }
+}