npm - @aws/ml-container-creator - Versions diffs - 1.0.3 → 1.1.0 - Mend

@aws/ml-container-creator 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/README.md +10 -1
package/bin/cli.js +57 -0
package/config/agent.json +16 -0
package/infra/ci-harness/lib/ci-harness-stack.ts +43 -0
package/package.json +5 -2
package/pyproject.toml +3 -0
package/servers/agent-knowledge/index.js +592 -0
package/servers/agent-knowledge/package.json +15 -0
package/servers/base-image-picker/index.js +65 -18
package/servers/instance-sizer/index.js +32 -0
package/servers/lib/catalogs/fleet-drivers.json +38 -0
package/servers/lib/catalogs/model-arch-support.json +51 -0
package/servers/lib/catalogs/model-servers.json +2842 -1730
package/servers/lib/schemas/image-catalog.schema.json +12 -0
package/src/agent/__init__.py +2 -0
package/src/agent/__pycache__/__init__.cpython-312.pyc +0 -0
package/src/agent/__pycache__/config_loader.cpython-312.pyc +0 -0
package/src/agent/__pycache__/context.cpython-312.pyc +0 -0
package/src/agent/__pycache__/health_check.cpython-312.pyc +0 -0
package/src/agent/agent.py +513 -0
package/src/agent/config_loader.py +215 -0
package/src/agent/context.py +380 -0
package/src/agent/data/capability-matrix.json +106 -0
package/src/agent/health_check.py +341 -0
package/src/agent/prompts/system.md +173 -0
package/src/agent/requirements-agent.txt +3 -0
package/src/app.js +6 -4
package/src/lib/generated/cli-options.js +1 -1
package/src/lib/generated/parameter-matrix.js +1 -1
package/src/lib/generated/validation-rules.js +1 -1
package/src/lib/mcp-query-runner.js +110 -3
package/src/lib/prompt-runner.js +66 -22
package/src/lib/template-variable-resolver.js +8 -0
package/src/lib/train-config-builder.js +339 -0
package/src/lib/tune-config-state.js +89 -68
package/templates/do/.benchmark_writer.py +3 -0
package/templates/do/.eval_helper.py +409 -0
package/templates/do/.register_helper.py +185 -11
package/templates/do/.train_build_request.py +102 -113
package/templates/do/.train_helper.py +433 -0
package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
package/templates/do/adapter +157 -0
package/templates/do/benchmark +60 -3
package/templates/do/config +6 -1
package/templates/do/deploy.d/managed-inference.ejs +83 -0
package/templates/do/evaluate +272 -0
package/templates/do/lib/resolve-instance.sh +155 -0
package/templates/do/register +5 -0
package/templates/do/test +1 -0
package/templates/do/train +879 -126
package/templates/do/training/config.yaml +83 -11
package/templates/do/training/dpo/accelerate_config.yaml +24 -0
package/templates/do/training/dpo/defaults.yaml +26 -0
package/templates/do/training/dpo/prompts.json +8 -0
package/templates/do/training/dpo/train.py +363 -0
package/templates/do/training/sft/accelerate_config.yaml +22 -0
package/templates/do/training/sft/defaults.yaml +18 -0
package/templates/do/training/sft/prompts.json +7 -0
package/templates/do/training/sft/train.py +310 -0
package/templates/do/tune +11 -2
package/src/lib/auto-prompt-builder.js +0 -172
package/src/lib/cli-handler.js +0 -529
package/src/lib/community-reports-validator.js +0 -91
package/src/lib/configuration-exporter.js +0 -204
package/src/lib/dataset-slug.js +0 -152
package/src/lib/docker-introspection-validator.js +0 -51
package/src/lib/known-flags-validator.js +0 -200
package/src/lib/schema-validator.js +0 -157
package/src/lib/train-config-parser.js +0 -136
package/src/lib/train-config-persistence.js +0 -143
package/src/lib/train-config-validator.js +0 -112
package/src/lib/train-feedback.js +0 -46
package/src/lib/train-idempotency.js +0 -97
package/src/lib/train-request-builder.js +0 -120
package/src/lib/tune-dataset-validator.js +0 -279
package/src/lib/tune-output-resolver.js +0 -66
package/templates/do/.train_poll_parser.py +0 -135
package/templates/do/.train_status_parser.py +0 -187
/package/templates/do/training/{train.py → custom/train.py} +0 -0

package/src/lib/train-request-builder.js DELETED Viewed

@@ -1,120 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Train Request Builder
- *
- * JavaScript module that replicates the Python helper's (.train_build_request.py)
- * logic for constructing a CreateTrainingJob JSON request from a parsed config.
- *
- * This module mirrors the behavior of the Python build_request() function,
- * providing a testable implementation of the config-to-API mapping logic.
- */
-/**
- * Build a CreateTrainingJob request from a parsed training config.
- *
- * Maps config fields to the SageMaker CreateTrainingJob API structure:
- * - image → AlgorithmSpecification.TrainingImage
- * - instance_type → ResourceConfig.InstanceType
- * - instance_count → ResourceConfig.InstanceCount
- * - dataset → InputDataConfig[0].DataSource.S3DataSource.S3Uri
- * - output_path → OutputDataConfig.S3OutputPath
- * - hyperparameters → HyperParameters (string key-value pairs)
- * - max_runtime_seconds → StoppingCondition.MaxRuntimeInSeconds
- * - enable_spot=true → EnableManagedSpotTraining = true
- * - enable_spot=true → StoppingCondition.MaxWaitTimeInSeconds
- * - checkpoint_path → CheckpointConfig.S3Uri
- * - metric_definitions → AlgorithmSpecification.MetricDefinitions
- * - environment → Environment
- * - tags → Tags (converted from {k:v} to [{Key:k, Value:v}])
- *
- * @param {object} options - Build options
- * @param {string} options.jobName - Training job name
- * @param {string} options.roleArn - SageMaker execution role ARN
- * @param {object} options.config - Parsed training config (from parseTrainingConfig)
- * @returns {object} CreateTrainingJob request body
- */
-export function buildTrainingJobRequest({ jobName, roleArn, config }) {
-    const request = {
-        TrainingJobName: jobName,
-        RoleArn: roleArn,
-        AlgorithmSpecification: {
-            TrainingImage: config.image,
-            TrainingInputMode: 'File'
-        },
-        InputDataConfig: [
-            {
-                ChannelName: 'training',
-                DataSource: {
-                    S3DataSource: {
-                        S3DataType: 'S3Prefix',
-                        S3Uri: config.dataset,
-                        S3DataDistributionType: 'FullyReplicated'
-                    }
-                }
-            }
-        ],
-        OutputDataConfig: {
-            S3OutputPath: config.output_path
-        },
-        ResourceConfig: {
-            InstanceType: config.instance_type,
-            InstanceCount: parseInt(config.instance_count, 10),
-            VolumeSizeInGB: parseInt(config.volume_size_gb, 10)
-        },
-        StoppingCondition: {
-            MaxRuntimeInSeconds: parseInt(config.max_runtime_seconds, 10)
-        }
-    };
-    // Hyperparameters — ensure all values are strings (SageMaker requirement)
-    const hyperparams = config.hyperparameters || {};
-    if (Object.keys(hyperparams).length > 0) {
-        request.HyperParameters = {};
-        for (const [k, v] of Object.entries(hyperparams)) {
-            request.HyperParameters[String(k)] = String(v);
-        }
-    }
-    // Managed spot training
-    const enableSpot = config.enable_spot === 'true' || config.enable_spot === true;
-    if (enableSpot) {
-        request.EnableManagedSpotTraining = true;
-        request.StoppingCondition.MaxWaitTimeInSeconds = parseInt(config.max_wait_seconds, 10);
-    }
-    // Checkpoint configuration (for spot training resumption)
-    const checkpointPath = config.checkpoint_path || '';
-    if (checkpointPath) {
-        request.CheckpointConfig = {
-            S3Uri: checkpointPath
-        };
-    }
-    // Metric definitions (custom CloudWatch metrics)
-    const metricDefs = config.metric_definitions || [];
-    if (Array.isArray(metricDefs) && metricDefs.length > 0) {
-        request.AlgorithmSpecification.MetricDefinitions = metricDefs.map(m => ({
-            Name: m.name,
-            Regex: m.regex
-        }));
-    }
-    // Environment variables for the container
-    const environment = config.environment || {};
-    if (Object.keys(environment).length > 0) {
-        request.Environment = environment;
-    }
-    // Tags — convert from {key: value} map to [{Key: k, Value: v}] array
-    const tags = config.tags || {};
-    if (Object.keys(tags).length > 0) {
-        request.Tags = Object.entries(tags).map(([k, v]) => ({
-            Key: String(k),
-            Value: String(v)
-        }));
-    }
-    return request;
-}

package/src/lib/tune-dataset-validator.js DELETED Viewed

@@ -1,279 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Tune Dataset Validator
- *
- * Parses dataset arguments (S3 URIs and Hugging Face references) and
- * validates JSONL dataset lines against catalog-driven schemas.
- *
- * Requirements: 3.1, 3.5, 3.6, 3.7, 3.8, 3.10, 3.11, 3.12
- */
-/**
- * Parse a dataset argument string into a structured object.
- * Accepts S3 URIs (`s3://bucket/key`) or Hugging Face references
- * (`hf://org/name` or `hf://org/name/split`).
- *
- * @param {string} datasetStr - The dataset argument string
- * @returns {{ valid: boolean, type?: string, bucket?: string, key?: string, org?: string, name?: string, split?: string, error?: string }}
- */
-export function parseDatasetArg(datasetStr) {
-    if (!datasetStr || typeof datasetStr !== 'string') {
-        return {
-            valid: false,
-            error: 'Dataset argument is required and must be a non-empty string.'
-        };
-    }
-    const trimmed = datasetStr.trim();
-    if (trimmed.startsWith('s3://')) {
-        return _parseS3Uri(trimmed);
-    }
-    if (trimmed.startsWith('hf://')) {
-        return _parseHfReference(trimmed);
-    }
-    return {
-        valid: false,
-        error: `Invalid dataset format: "${trimmed}". Expected s3://bucket/key or hf://org/name[/split].`
-    };
-}
-/**
- * Validate JSONL lines against a dataset schema from the catalog.
- * Inspects only the first 10 lines per requirement.
- *
- * @param {string[]} lines - Array of JSONL line strings
- * @param {Object} schema - The datasetSchema object from the catalog
- * @param {string[]} schema.required - Array of required top-level keys
- * @param {Object} schema.types - Object mapping key to expected type ("string", "array", "object", "number")
- * @returns {{ valid: boolean, error: string|null, lineNumber: number|null, malformedLine: string|null, expectedFormat: string|null }}
- */
-export function validateDatasetFormat(lines, schema) {
-    if (!lines || !Array.isArray(lines)) {
-        return {
-            valid: false,
-            error: 'Lines must be provided as an array.',
-            lineNumber: null,
-            malformedLine: null,
-            expectedFormat: _buildExpectedFormat(schema)
-        };
-    }
-    if (!schema || !schema.required || !Array.isArray(schema.required)) {
-        return {
-            valid: false,
-            error: 'Schema must include a "required" array of keys.',
-            lineNumber: null,
-            malformedLine: null,
-            expectedFormat: null
-        };
-    }
-    const linesToInspect = lines.slice(0, 10);
-    for (let i = 0; i < linesToInspect.length; i++) {
-        const line = linesToInspect[i];
-        const lineNumber = i + 1;
-        // Skip empty lines
-        if (!line || line.trim() === '') {
-            continue;
-        }
-        // Try to parse as JSON
-        let parsed;
-        try {
-            parsed = JSON.parse(line);
-        } catch (e) {
-            return {
-                valid: false,
-                error: `Line ${lineNumber} is not valid JSON: ${e.message}`,
-                lineNumber,
-                malformedLine: line,
-                expectedFormat: _buildExpectedFormat(schema)
-            };
-        }
-        // Check that parsed value is an object
-        if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) {
-            return {
-                valid: false,
-                error: `Line ${lineNumber} must be a JSON object.`,
-                lineNumber,
-                malformedLine: line,
-                expectedFormat: _buildExpectedFormat(schema)
-            };
-        }
-        // Check required keys
-        for (const key of schema.required) {
-            if (!Object.hasOwn(parsed, key)) {
-                return {
-                    valid: false,
-                    error: `Line ${lineNumber} is missing required key "${key}".`,
-                    lineNumber,
-                    malformedLine: line,
-                    expectedFormat: _buildExpectedFormat(schema)
-                };
-            }
-        }
-        // Check types if specified
-        if (schema.types) {
-            for (const [key, expectedType] of Object.entries(schema.types)) {
-                if (!Object.hasOwn(parsed, key)) {
-                    continue;
-                }
-                const value = parsed[key];
-                if (!_checkType(value, expectedType)) {
-                    return {
-                        valid: false,
-                        error: `Line ${lineNumber} has key "${key}" with wrong type. Expected "${expectedType}", got "${_getType(value)}".`,
-                        lineNumber,
-                        malformedLine: line,
-                        expectedFormat: _buildExpectedFormat(schema)
-                    };
-                }
-            }
-        }
-    }
-    return {
-        valid: true,
-        error: null,
-        lineNumber: null,
-        malformedLine: null,
-        expectedFormat: null
-    };
-}
-/**
- * Parse an S3 URI into bucket and key components.
- * @param {string} uri - The S3 URI (e.g., "s3://bucket/path/to/file.jsonl")
- * @returns {Object} Parsed result
- * @private
- */
-function _parseS3Uri(uri) {
-    const withoutScheme = uri.slice(5); // Remove "s3://"
-    const slashIndex = withoutScheme.indexOf('/');
-    if (slashIndex === -1 || slashIndex === 0) {
-        return {
-            valid: false,
-            error: `Invalid S3 URI: "${uri}". Expected format: s3://bucket/key.`
-        };
-    }
-    const bucket = withoutScheme.slice(0, slashIndex);
-    const key = withoutScheme.slice(slashIndex + 1);
-    if (!bucket) {
-        return {
-            valid: false,
-            error: `Invalid S3 URI: "${uri}". Bucket name is empty.`
-        };
-    }
-    if (!key) {
-        return {
-            valid: false,
-            error: `Invalid S3 URI: "${uri}". Key path is empty.`
-        };
-    }
-    return {
-        valid: true,
-        type: 's3',
-        bucket,
-        key
-    };
-}
-/**
- * Parse a Hugging Face dataset reference into org, name, and split.
- * Defaults to 'train' split if not specified.
- * @param {string} ref - The HF reference (e.g., "hf://org/name" or "hf://org/name/split")
- * @returns {Object} Parsed result
- * @private
- */
-function _parseHfReference(ref) {
-    const withoutScheme = ref.slice(5); // Remove "hf://"
-    const parts = withoutScheme.split('/');
-    if (parts.length < 2 || !parts[0] || !parts[1]) {
-        return {
-            valid: false,
-            error: `Invalid Hugging Face reference: "${ref}". Expected format: hf://org/name[/split].`
-        };
-    }
-    const org = parts[0];
-    const name = parts[1];
-    const split = parts.length >= 3 && parts[2] ? parts[2] : 'train';
-    return {
-        valid: true,
-        type: 'hf',
-        org,
-        name,
-        split
-    };
-}
-/**
- * Check if a value matches the expected schema type.
- * @param {*} value - The value to check
- * @param {string} expectedType - One of "string", "array", "object", "number"
- * @returns {boolean} True if the value matches the expected type
- * @private
- */
-function _checkType(value, expectedType) {
-    switch (expectedType) {
-    case 'string':
-        return typeof value === 'string';
-    case 'number':
-        return typeof value === 'number';
-    case 'array':
-        return Array.isArray(value);
-    case 'object':
-        return typeof value === 'object' && value !== null && !Array.isArray(value);
-    default:
-        return true;
-    }
-}
-/**
- * Get a human-readable type name for a value.
- * @param {*} value - The value to describe
- * @returns {string} The type name
- * @private
- */
-function _getType(value) {
-    if (value === null) return 'null';
-    if (Array.isArray(value)) return 'array';
-    return typeof value;
-}
-/**
- * Build a human-readable expected format description from a schema.
- * @param {Object} schema - The dataset schema
- * @returns {string|null} Description of expected format
- * @private
- */
-function _buildExpectedFormat(schema) {
-    if (!schema || !schema.required) {
-        return null;
-    }
-    const fields = schema.required.map(key => {
-        const type = schema.types && schema.types[key] ? schema.types[key] : 'any';
-        return `"${key}": <${type}>`;
-    });
-    return `Each line must be a JSON object with: {${fields.join(', ')}}`;
-}

package/src/lib/tune-output-resolver.js DELETED Viewed

@@ -1,66 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Tune Output Resolver
- *
- * Detects output type from training type and generates context-aware
- * next-step commands for deploying tune job artifacts.
- *
- * Requirements: 8.3, 8.11
- */
-/**
- * Detect the output type based on the training type used for the job.
- * LoRA training produces adapter weights; full-rank produces a full model.
- *
- * @param {string} trainingType - The training type ('lora' or 'full-rank')
- * @returns {string} The output type: 'adapter' for lora, 'full-model' for full-rank
- */
-export function detectOutputType(trainingType) {
-    if (trainingType === 'lora') {
-        return 'adapter';
-    }
-    if (trainingType === 'full-rank') {
-        return 'full-model';
-    }
-    return 'adapter';
-}
-/**
- * Generate context-aware next-step commands based on the output type.
- *
- * For adapter output:
- *   - Quick path: ./do/adapter add tuned-${technique} --from-tune
- *   - Technique-specific: ./do/adapter add tuned-${technique} --from-tune ${technique}
- *   - Explicit path: ./do/adapter add tuned-${technique} --weights ${artifactPath}
- *
- * For full-model output:
- *   - Deploy as new IC: ./do/add-ic tuned-v1 --from-tune
- *   - Explicit path: ./do/add-ic tuned-v1 --model-data ${artifactPath}
- *   - Replace current base: ./do/deploy --force-ic --model-data ${artifactPath}
- *
- * @param {string} outputType - The output type ('adapter' or 'full-model')
- * @param {string} technique - The technique used (e.g., 'sft', 'dpo')
- * @param {string} artifactPath - The S3 path to the output artifact
- * @returns {string[]} Array of suggested next-step commands
- */
-export function generateNextStepCommands(outputType, technique, artifactPath) {
-    if (outputType === 'adapter') {
-        return [
-            `./do/adapter add tuned-${technique} --from-tune`,
-            `./do/adapter add tuned-${technique} --from-tune ${technique}`,
-            `./do/adapter add tuned-${technique} --weights ${artifactPath}`
-        ];
-    }
-    if (outputType === 'full-model') {
-        return [
-            './do/add-ic tuned-v1 --from-tune',
-            `./do/add-ic tuned-v1 --model-data ${artifactPath}`,
-            `./do/deploy --force-ic --model-data ${artifactPath}`
-        ];
-    }
-    return [];
-}

package/templates/do/.train_poll_parser.py DELETED Viewed

@@ -1,135 +0,0 @@
-#!/usr/bin/env python3
-# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-# SPDX-License-Identifier: Apache-2.0
-"""
-Parse DescribeTrainingJob JSON for the polling loop in do/train.
-Reads JSON from stdin and outputs structured key=value lines for bash consumption:
-  STATUS=<TrainingJobStatus>
-  SECONDARY=<SecondaryStatus>
-  FAILURE_REASON=<FailureReason or empty>
-  DISPLAY=<formatted single-line status display>
-This keeps the bash poll loop simple while handling JSON parsing in Python.
-"""
-import json
-import sys
-from datetime import datetime, timezone
-def format_duration(seconds):
-    """Format seconds into a human-readable duration string."""
-    if seconds is None or seconds < 0:
-        return 'N/A'
-    hours = int(seconds // 3600)
-    minutes = int((seconds % 3600) // 60)
-    secs = int(seconds % 60)
-    if hours > 0:
-        return f'{hours}h {minutes}m {secs}s'
-    elif minutes > 0:
-        return f'{minutes}m {secs}s'
-    else:
-        return f'{secs}s'
-def parse_iso_time(time_str):
-    """Parse an ISO 8601 timestamp string to a datetime object."""
-    if not time_str:
-        return None
-    try:
-        time_str = time_str.replace('Z', '+00:00')
-        return datetime.fromisoformat(time_str)
-    except (ValueError, TypeError):
-        return None
-def calculate_elapsed(start_time_str):
-    """Calculate elapsed time from start to now."""
-    start = parse_iso_time(start_time_str)
-    if not start:
-        return None
-    now = datetime.now(timezone.utc)
-    elapsed = (now - start).total_seconds()
-    return max(0, elapsed)
-def format_metrics(final_metrics):
-    """Format FinalMetricDataList into a compact string."""
-    if not final_metrics:
-        return ''
-    parts = []
-    for metric in final_metrics:
-        name = metric.get('MetricName', 'unknown')
-        value = metric.get('Value', 0)
-        if isinstance(value, float):
-            if abs(value) < 0.001:
-                parts.append(f'{name}={value:.6f}')
-            elif abs(value) < 1:
-                parts.append(f'{name}={value:.4f}')
-            else:
-                parts.append(f'{name}={value:.2f}')
-        else:
-            parts.append(f'{name}={value}')
-    return ', '.join(parts)
-# Status emoji mapping
-STATUS_EMOJI = {
-    'InProgress': '🔄',
-    'Completed': '✅',
-    'Failed': '❌',
-    'Stopping': '⏸️',
-    'Stopped': '⏹️'
-}
-def main():
-    """Parse DescribeTrainingJob JSON from stdin and output structured lines."""
-    try:
-        job_data = json.load(sys.stdin)
-    except json.JSONDecodeError as e:
-        print(f'Error parsing JSON: {e}', file=sys.stderr)
-        sys.exit(1)
-    status = job_data.get('TrainingJobStatus', 'Unknown')
-    secondary_status = job_data.get('SecondaryStatus', '')
-    failure_reason = job_data.get('FailureReason', '')
-    training_start = job_data.get('TrainingStartTime', '')
-    final_metrics = job_data.get('FinalMetricDataList', [])
-    # Calculate elapsed time
-    elapsed_str = ''
-    if training_start:
-        elapsed = calculate_elapsed(training_start)
-        if elapsed is not None:
-            elapsed_str = format_duration(elapsed)
-    # Format metrics
-    metrics_str = format_metrics(final_metrics)
-    # Build display line
-    emoji = STATUS_EMOJI.get(status, '❓')
-    display_parts = [f'   {emoji} {status}']
-    if secondary_status:
-        display_parts.append(f'| {secondary_status}')
-    if elapsed_str:
-        display_parts.append(f'| elapsed: {elapsed_str}')
-    if metrics_str:
-        display_parts.append(f'| {metrics_str}')
-    display_line = ' '.join(display_parts)
-    # Output structured lines for bash
-    print(f'STATUS={status}')
-    print(f'SECONDARY={secondary_status}')
-    print(f'FAILURE_REASON={failure_reason}')
-    print(f'DISPLAY={display_line}')
-if __name__ == '__main__':
-    main()