npm - @aws/ml-container-creator - Versions diffs - 0.8.0 → 0.9.0 - Mend

@aws/ml-container-creator 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/LICENSE-THIRD-PARTY +50760 -16218
package/package.json +3 -1
package/servers/lib/catalogs/instances.json +52 -1275
package/servers/lib/catalogs/models.json +0 -132
package/servers/lib/catalogs/popular-diffusors.json +1 -110
package/src/app.js +24 -2
package/src/lib/mcp-client.js +16 -1
package/src/lib/mcp-command-handler.js +10 -2
package/src/lib/prompt-runner.js +16 -2
package/src/lib/train-config-parser.js +136 -0
package/src/lib/train-config-persistence.js +143 -0
package/src/lib/train-config-validator.js +112 -0
package/src/lib/train-feedback.js +46 -0
package/src/lib/train-idempotency.js +97 -0
package/src/lib/train-request-builder.js +120 -0
package/templates/do/.train_build_request.py +141 -0
package/templates/do/.train_poll_parser.py +135 -0
package/templates/do/.train_status_parser.py +187 -0
package/templates/do/lib/feedback.sh +41 -0
package/templates/do/train +786 -0
package/templates/do/training/config.yaml +140 -0
package/templates/do/training/train.py +463 -0

package/servers/lib/catalogs/models.json CHANGED Viewed

@@ -555,98 +555,6 @@
             "text-generation"
         ]
     },
-    "stabilityai/stable-diffusion-3.5-medium": {
-        "family": "stable-diffusion-3",
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "stable-diffusion"
-        ],
-        "architecture": "StableDiffusion3Pipeline",
-        "profiles": {
-            "default": {
-                "displayName": "SD3.5 Medium",
-                "envVars": {}
-            }
-        },
-        "notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline.",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-image"
-        ]
-    },
-    "black-forest-labs/FLUX.1-dev": {
-        "family": "flux",
-        "gated": true,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": "FluxPipeline",
-        "profiles": {
-            "default": {
-                "displayName": "FLUX.1 Dev",
-                "envVars": {}
-            }
-        },
-        "notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM.",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-image"
-        ]
-    },
-    "black-forest-labs/FLUX.1-schnell": {
-        "family": "flux",
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": "FluxPipeline",
-        "notes": "FLUX.1-schnell fast generation model. Fewer denoising steps for faster inference at slightly lower quality",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-image"
-        ]
-    },
-    "Wan-AI/Wan2.1-T2V-14B-Diffusers": {
-        "family": "wan",
-        "gated": false,
-        "tags": [
-            "video-generation",
-            "diffusion",
-            "wan"
-        ],
-        "architecture": "WanPipeline",
-        "notes": "Wan2.1 text-to-video 14B model (diffusers format). Requires multi-GPU instance (ml.g5.12xlarge or larger). Must use the -Diffusers variant — the base Wan2.1-T2V-14B repo lacks model_index.json required by vLLM-Omni",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.16.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-video"
-        ]
-    },
     "meta-llama/Llama-3*": {
         "family": "llama-3",
         "gated": true,
@@ -731,45 +639,5 @@
         "tasks": [
             "text-generation"
         ]
-    },
-    "stabilityai/stable-diffusion-*": {
-        "family": "stable-diffusion",
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "stable-diffusion"
-        ],
-        "architecture": null,
-        "notes": "Fallback for Stable Diffusion variants not explicitly listed",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-image"
-        ]
-    },
-    "black-forest-labs/FLUX*": {
-        "family": "flux",
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": null,
-        "notes": "Fallback for FLUX model variants not explicitly listed",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-image"
-        ]
     }
 }

package/servers/lib/catalogs/popular-diffusors.json CHANGED Viewed

@@ -1,110 +1 @@
-{
-    "stabilityai/stable-diffusion-3.5-medium": {
-        "family": "stable-diffusion-3",
-        "chat_template": null,
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "stable-diffusion"
-        ],
-        "architecture": "StableDiffusion3Pipeline",
-        "framework_compatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validation_level": "experimental",
-        "profiles": {
-            "default": {
-                "displayName": "SD3.5 Medium",
-                "envVars": {}
-            }
-        },
-        "notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline."
-    },
-    "black-forest-labs/FLUX.1-dev": {
-        "family": "flux",
-        "chat_template": null,
-        "gated": true,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": "FluxPipeline",
-        "framework_compatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validation_level": "experimental",
-        "profiles": {
-            "default": {
-                "displayName": "FLUX.1 Dev",
-                "envVars": {}
-            }
-        },
-        "notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM."
-    },
-    "black-forest-labs/FLUX.1-schnell": {
-        "family": "flux",
-        "chat_template": null,
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": "FluxPipeline",
-        "framework_compatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validation_level": "experimental",
-        "notes": "FLUX.1-schnell fast generation model. Fewer denoising steps for faster inference at slightly lower quality"
-    },
-    "Wan-AI/Wan2.1-T2V-14B-Diffusers": {
-        "family": "wan",
-        "chat_template": null,
-        "gated": false,
-        "tags": [
-            "video-generation",
-            "diffusion",
-            "wan"
-        ],
-        "architecture": "WanPipeline",
-        "framework_compatibility": {
-            "vllm-omni": ">=0.16.0"
-        },
-        "validation_level": "experimental",
-        "notes": "Wan2.1 text-to-video 14B model (diffusers format). Requires multi-GPU instance (ml.g5.12xlarge or larger). Must use the -Diffusers variant — the base Wan2.1-T2V-14B repo lacks model_index.json required by vLLM-Omni"
-    },
-    "stabilityai/stable-diffusion-*": {
-        "family": "stable-diffusion",
-        "chat_template": null,
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "stable-diffusion"
-        ],
-        "architecture": null,
-        "framework_compatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validation_level": "experimental",
-        "notes": "Fallback for Stable Diffusion variants not explicitly listed"
-    },
-    "black-forest-labs/FLUX*": {
-        "family": "flux",
-        "chat_template": null,
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": null,
-        "framework_compatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validation_level": "experimental",
-        "notes": "Fallback for FLUX model variants not explicitly listed"
-    }
-}
+{}

package/src/app.js CHANGED Viewed

@@ -349,11 +349,27 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
     }
     // Exclude tune files when framework is NOT transformers OR deploymentTarget is batch-transform
-    if (architecture !== 'transformers' || answers.deploymentTarget === 'batch-transform') {
+    const tuneIncluded = architecture === 'transformers' && answers.deploymentTarget !== 'batch-transform';
+    if (!tuneIncluded) {
         ignorePatterns.push('**/do/tune');
         ignorePatterns.push('**/do/.tune_helper.py');
     }
+    // Exclude train files when deploymentTarget is batch-transform
+    const trainIncluded = answers.deploymentTarget !== 'batch-transform';
+    if (!trainIncluded) {
+        ignorePatterns.push('**/do/train');
+        ignorePatterns.push('**/do/.train_build_request.py');
+        ignorePatterns.push('**/do/.train_status_parser.py');
+        ignorePatterns.push('**/do/.train_poll_parser.py');
+        ignorePatterns.push('**/do/training/**');
+    }
+    // Exclude feedback.sh when neither tune nor train is included
+    if (!tuneIncluded && !trainIncluded) {
+        ignorePatterns.push('**/do/lib/feedback.sh');
+    }
     // Exclude do/test when hosted-model-endpoint is not selected
     const testTypes = answers.testTypes || [];
     if (!testTypes.includes('hosted-model-endpoint')) {
@@ -371,6 +387,11 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
         ignorePatterns.push('**/do/adapters/**');
         ignorePatterns.push('**/do/tune');
         ignorePatterns.push('**/do/.tune_helper.py');
+        ignorePatterns.push('**/do/train');
+        ignorePatterns.push('**/do/.train_build_request.py');
+        ignorePatterns.push('**/do/.train_status_parser.py');
+        ignorePatterns.push('**/do/.train_poll_parser.py');
+        ignorePatterns.push('**/do/training/**');
         ignorePatterns.push('**/do/add-ic');
         ignorePatterns.push('**/do/run');
         ignorePatterns.push('**/sample_model/**');
@@ -1177,7 +1198,8 @@ function _setExecutablePermissions(destDir, answers = {}) {
         'do/status',
         'do/add-ic',
         'do/adapter',
-        'do/tune'
+        'do/tune',
+        'do/train'
     ];
     const shellScripts = architecture === 'marketplace' ? marketplaceScripts : defaultScripts;

package/src/lib/mcp-client.js CHANGED Viewed

@@ -14,6 +14,12 @@
 import { Client } from '@modelcontextprotocol/sdk/client/index.js';
 import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
+import path from 'path';
+import { fileURLToPath } from 'url';
+const __mcp_filename = fileURLToPath(import.meta.url);
+const __mcp_dirname = path.dirname(__mcp_filename);
+const PACKAGE_ROOT = path.resolve(__mcp_dirname, '../..');
 const DEFAULT_TOOL_NAME = 'get_ml_config';
 const DEFAULT_LIMIT = 10;
@@ -96,6 +102,15 @@ class McpClient {
     async _executeQuery() {
         const { command, args = [], env } = this.serverConfig;
+        // Resolve relative paths in args against the package root
+        const resolvedArgs = args.map(arg => {
+            if (arg && !path.isAbsolute(arg) && !arg.startsWith('-')) {
+                const resolved = path.resolve(PACKAGE_ROOT, arg);
+                return resolved;
+            }
+            return arg;
+        });
         // Build environment: merge process.env with server-specific env
         // When --smart flag is active, inject BEDROCK_SMART=true for this run
         // Discover mode is now default; inject DISCOVER_MODE=false only when explicitly disabled
@@ -108,7 +123,7 @@ class McpClient {
         // Create stdio transport — spawns the server process
         this._transport = new StdioClientTransport({
             command,
-            args,
+            args: resolvedArgs,
             env: spawnEnv,
             stderr: 'pipe'
         });

package/src/lib/mcp-command-handler.js CHANGED Viewed

@@ -91,8 +91,12 @@ export default class McpCommandHandler {
             const installed = await this._installBundledDependencies(resolved.serverDir, name);
             if (!installed) return;
+            // Store path relative to package root for portability
+            const packageRoot = path.resolve(__dirname, '../..');
+            const relativePath = path.relative(packageRoot, resolved.entryPoint);
             command = 'node';
-            commandArgs = [resolved.entryPoint];
+            commandArgs = [relativePath];
         } else {
             // Find the '--' separator to split name from command
             const separatorIndex = positionalArgs.indexOf('--');
@@ -195,9 +199,13 @@ export default class McpCommandHandler {
             const installed = await this._installBundledDependencies(resolved.serverDir, server.name);
             if (!installed) continue;
+            // Store path relative to package root for portability across machines
+            const packageRoot = path.resolve(__dirname, '../..');
+            const relativePath = path.relative(packageRoot, resolved.entryPoint);
             config.mcpServers[server.name] = {
                 command: 'node',
-                args: [resolved.entryPoint]
+                args: [relativePath]
             };
             added++;
         }

package/src/lib/prompt-runner.js CHANGED Viewed

@@ -50,6 +50,20 @@ const __pr_filename = fileURLToPath(import.meta.url);
 const __pr_dirname = path.dirname(__pr_filename);
 const GENERATOR_ROOT = path.resolve(__pr_dirname, '..', '..');
+/**
+ * Resolve MCP server args — converts relative paths to absolute using GENERATOR_ROOT.
+ * @param {string[]} args - The args array from mcp.json serverConfig
+ * @returns {string[]} Args with relative paths resolved
+ */
+function resolveMcpArgs(args) {
+    return (args || []).map(arg => {
+        if (arg && !path.isAbsolute(arg) && !arg.startsWith('-')) {
+            return path.resolve(GENERATOR_ROOT, arg);
+        }
+        return arg;
+    });
+}
 export default class PromptRunner {
     constructor({ configManager, options, registryConfigManager, baseConfig, promptFn }) {
         this.configManager = configManager;
@@ -1384,7 +1398,7 @@ export default class PromptRunner {
             const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
             const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
-            const serverArgs = [...(serverConfig.args || [])];
+            const serverArgs = [...resolveMcpArgs(serverConfig.args)];
             if (!discover && !serverArgs.includes('--no-discover')) {
                 serverArgs.push('--no-discover');
             }
@@ -1939,7 +1953,7 @@ export default class PromptRunner {
                             const transport = new StdioClientTransport({
                                 command: serverConfig.command,
-                                args: serverConfig.args || [],
+                                args: resolveMcpArgs(serverConfig.args),
                                 env: { ...process.env, ...(serverConfig.env || {}) },
                                 stderr: 'pipe'
                             });

package/src/lib/train-config-parser.js ADDED Viewed

@@ -0,0 +1,136 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Train Config Parser
+ *
+ * JavaScript module that replicates the YAML config parsing logic from
+ * do/train's _parse_config_python() function. Parses do/training/config.yaml
+ * and extracts all supported fields into a structured object.
+ *
+ * This module mirrors the behavior of both the yq and Python fallback paths
+ * in the bash script, providing a testable implementation of the parsing logic.
+ */
+import { readFileSync } from 'node:fs';
+import yaml from 'js-yaml';
+/**
+ * Default values for optional fields, matching the bash script defaults.
+ */
+const DEFAULTS = {
+    instance_count: '1',
+    max_runtime_seconds: '86400',
+    volume_size_gb: '50',
+    enable_spot: 'false',
+    max_wait_seconds: '172800',
+    checkpoint_path: '',
+    hyperparameters: {},
+    metric_definitions: [],
+    environment: {},
+    tags: {}
+};
+/**
+ * Convert a value to its string representation, matching the Python helper's
+ * `s()` function behavior in _parse_config_python.
+ *
+ * @param {*} val - The value to convert
+ * @param {string} defaultVal - Default value if val is null/undefined
+ * @returns {string} String representation
+ */
+function toStringValue(val, defaultVal = '') {
+    if (val === null || val === undefined) {
+        return defaultVal;
+    }
+    if (typeof val === 'boolean') {
+        return val ? 'true' : 'false';
+    }
+    return String(val);
+}
+/**
+ * Parse a training config YAML file and extract all supported fields.
+ *
+ * This mirrors the behavior of _parse_config_python() in do/train:
+ * - Scalar fields are converted to strings
+ * - Boolean fields are converted to "true"/"false" strings
+ * - Missing optional fields get default values
+ * - Complex fields (hyperparameters, metric_definitions, environment, tags)
+ *   are kept as their native types (objects/arrays)
+ *
+ * @param {string} configPath - Path to the YAML config file
+ * @returns {object} Parsed config with all supported fields
+ * @throws {Error} If the file cannot be read or parsed
+ */
+export function parseTrainingConfig(configPath) {
+    const content = readFileSync(configPath, 'utf8');
+    return parseTrainingConfigFromString(content);
+}
+/**
+ * Parse a training config from a YAML string.
+ * Useful for testing without file I/O.
+ *
+ * @param {string} yamlContent - YAML content string
+ * @returns {object} Parsed config with all supported fields
+ * @throws {Error} If the YAML cannot be parsed
+ */
+export function parseTrainingConfigFromString(yamlContent) {
+    const cfg = yaml.load(yamlContent) || {};
+    return {
+        // Required fields (empty string if missing)
+        image: toStringValue(cfg.image, ''),
+        script: toStringValue(cfg.script, ''),
+        instance_type: toStringValue(cfg.instance_type, ''),
+        instance_count: toStringValue(cfg.instance_count, DEFAULTS.instance_count),
+        dataset: toStringValue(cfg.dataset, ''),
+        output_path: toStringValue(cfg.output_path, ''),
+        // Optional scalar fields with defaults
+        max_runtime_seconds: toStringValue(cfg.max_runtime_seconds, DEFAULTS.max_runtime_seconds),
+        volume_size_gb: toStringValue(cfg.volume_size_gb, DEFAULTS.volume_size_gb),
+        enable_spot: toStringValue(cfg.enable_spot, DEFAULTS.enable_spot),
+        max_wait_seconds: toStringValue(cfg.max_wait_seconds, DEFAULTS.max_wait_seconds),
+        checkpoint_path: toStringValue(cfg.checkpoint_path, DEFAULTS.checkpoint_path),
+        // Complex fields (objects/arrays)
+        hyperparameters: cfg.hyperparameters || DEFAULTS.hyperparameters,
+        metric_definitions: cfg.metric_definitions || DEFAULTS.metric_definitions,
+        environment: cfg.environment || DEFAULTS.environment,
+        tags: cfg.tags || DEFAULTS.tags
+    };
+}
+/**
+ * List of all supported fields in the training config.
+ */
+export const SUPPORTED_FIELDS = [
+    'image',
+    'script',
+    'instance_type',
+    'instance_count',
+    'dataset',
+    'output_path',
+    'max_runtime_seconds',
+    'volume_size_gb',
+    'enable_spot',
+    'max_wait_seconds',
+    'checkpoint_path',
+    'hyperparameters',
+    'metric_definitions',
+    'environment',
+    'tags'
+];
+/**
+ * List of required fields that must be non-empty.
+ */
+export const REQUIRED_FIELDS = [
+    'image',
+    'script',
+    'instance_type',
+    'dataset',
+    'output_path'
+];