npm - @aws/ml-container-creator - Versions diffs - 0.6.1 → 0.8.0 - Mend

@aws/ml-container-creator 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/bin/cli.js +1 -1
package/infra/ci-harness/buildspec.yml +4 -0
package/package.json +1 -1
package/servers/lib/catalogs/model-servers.json +80 -0
package/servers/model-picker/index.js +27 -16
package/src/app.js +89 -21
package/src/lib/cli-handler.js +1 -1
package/src/lib/config-manager.js +39 -2
package/src/lib/cross-cutting-checker.js +146 -33
package/src/lib/deployment-config-resolver.js +10 -4
package/src/lib/e2e-bootstrap.js +227 -0
package/src/lib/e2e-catalog-validator.js +103 -0
package/src/lib/e2e-quota-validator.js +135 -0
package/src/lib/prompt-runner.js +290 -22
package/src/lib/prompts.js +9 -3
package/src/lib/template-manager.js +10 -4
package/src/lib/tune-catalog-validator.js +5 -5
package/templates/Dockerfile +2 -0
package/templates/code/cw_log_forwarder.py +64 -0
package/templates/code/serve +14 -3
package/templates/code/serving.properties +2 -2
package/templates/deploy_notebook_generator.py +897 -0
package/templates/diffusors/serve +3 -3
package/templates/do/.tune_helper.py +2 -2
package/templates/do/export +19 -2
package/templates/do/lib/endpoint-config.sh +3 -1
package/templates/do/lib/inference-component.sh +5 -1
package/templates/do/register +8 -2
package/templates/do/test +5 -5
package/templates/do/tune +2 -2
package/templates/marketplace/config +118 -0
package/templates/marketplace/deploy +890 -0
package/templates/marketplace/test +453 -0

package/templates/diffusors/serve CHANGED Viewed

@@ -9,10 +9,10 @@ echo "Starting vLLM-Omni server (diffusion model serving)"
 # Resolve model URI prefixes that engines cannot handle natively.
 # The generator's model-picker may store provider-specific URIs
-# (e.g. jumpstart://model-txt2img-stabilityai-stable-diffusion-v2-1-base)
-# as the model identifier. vLLM expects a HuggingFace repo ID or local path.
+# (e.g. registry://my-model-group/1) as the model identifier.
+# vLLM expects a HuggingFace repo ID or local path.
 _RAW_MODEL="${VLLM_MODEL:-}"
-if [[ "$_RAW_MODEL" == jumpstart://* ]] || [[ "$_RAW_MODEL" == jumpstart-hub://* ]] || [[ "$_RAW_MODEL" == registry://* ]]; then
+if [[ "$_RAW_MODEL" == registry://* ]]; then
     if [ -d /opt/ml/model ] && [ "$(ls -A /opt/ml/model 2>/dev/null)" ]; then
         echo "Resolved VLLM_MODEL='${_RAW_MODEL}' → /opt/ml/model (local artifacts found)"
         export VLLM_MODEL="/opt/ml/model"

package/templates/do/.tune_helper.py CHANGED Viewed

@@ -176,7 +176,7 @@ def cmd_submit(args):
             )
         elif "ValidationException" in error_msg and "license" in error_msg.lower():
             _error_exit(
-                f"Model license not accepted. Accept the license in JumpStart before "
+                f"Model license not accepted. Accept the model license before "
                 f"using this model for customization. Details: {error_msg}"
             )
         else:
@@ -660,7 +660,7 @@ def main():
     # ── submit ────────────────────────────────────────────────────────────────
     submit_parser = subparsers.add_parser("submit", help="Submit a customization job")
-    submit_parser.add_argument("--model-id", required=True, help="JumpStart model ID")
+    submit_parser.add_argument("--model-id", required=True, help="Model ID")
     submit_parser.add_argument("--technique", required=True,
                                choices=["sft", "dpo", "rlaif", "rlvr"],
                                help="Customization technique")

package/templates/do/export CHANGED Viewed

@@ -2,16 +2,33 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
-# Export current configuration as a CLI command or JSON object
-# Usage: ./do/export [--json]
+# Export current configuration as a CLI command, JSON object, or Jupyter notebook
+# Usage: ./do/export [--json | --notebook]
 # Source configuration (suppress the summary output)
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config" > /dev/null 2>&1
+# ── Notebook output mode ──────────────────────────────────────────────────────
+if [ "${1:-}" = "--notebook" ]; then
+    # Ensure not combined with --json
+    if [ "${2:-}" = "--json" ]; then
+        echo "Error: --notebook and --json are mutually exclusive" >&2
+        exit 1
+    fi
+    python3 "${SCRIPT_DIR}/../deploy_notebook_generator.py"
+    exit 0
+fi
 # ── JSON output mode ─────────────────────────────────────────────────────────
 if [ "${1:-}" = "--json" ]; then
+    # Ensure not combined with --notebook
+    if [ "${2:-}" = "--notebook" ]; then
+        echo "Error: --notebook and --json are mutually exclusive" >&2
+        exit 1
+    fi
     # Build a JSON object with all configuration parameters.
     # Uses ConfigManager camelCase keys so the output can be fed directly
     # back into the generator via --config=<file>.

package/templates/do/lib/endpoint-config.sh CHANGED Viewed

@@ -152,7 +152,9 @@ create_endpoint_config() {
         variant_json="${variant_json}}]"
     else
         # Standard path: single instance type
-        variant_json="[{\"VariantName\":\"AllTraffic\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1"
+        # RoutingConfig is required for IC-based endpoints — without it the IC scheduler
+        # cannot place containers and the IC stays in Creating with no logs.
+        variant_json="[{\"VariantName\":\"AllTraffic\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1,\"RoutingConfig\":{\"RoutingStrategy\":\"LEAST_OUTSTANDING_REQUESTS\"}"
         # Optional: AMI version
         if [ -n "${INFERENCE_AMI_VERSION:-}" ]; then

package/templates/do/lib/inference-component.sh CHANGED Viewed

@@ -46,10 +46,14 @@ create_inference_component() {
     # Build container spec JSON
     local container_spec="{\"Image\":\"${ECR_REPOSITORY}:${IC_IMAGE_TAG:-${PROJECT_NAME}-latest}\""
+    # Always inject IC name for CW log forwarder
+    local ic_env="\"INFERENCE_COMPONENT_NAME\":\"${ic_name}\""
     if [ -n "${CONTAINER_ENV_JSON}${IC_CONTAINER_ENV_EXTRA:-}" ]; then
         local env_json="${CONTAINER_ENV_JSON}"
         [ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
-        container_spec="${container_spec},\"Environment\":{${env_json}}"
+        container_spec="${container_spec},\"Environment\":{${ic_env},${env_json}}"
+    else
+        container_spec="${container_spec},\"Environment\":{${ic_env}}"
     fi
     container_spec="${container_spec}}"

package/templates/do/register CHANGED Viewed

@@ -191,8 +191,14 @@ fi
 # ============================================================
 # DEPLOYMENT_CONFIG format: <architecture>-<backend> (e.g., transformers-vllm, http-flask, triton-fil)
-ARCHITECTURE="${DEPLOYMENT_CONFIG%%-*}"
-BACKEND="${DEPLOYMENT_CONFIG#*-}"
+# Special case: marketplace has no backend
+if [ "${DEPLOYMENT_CONFIG}" = "marketplace" ]; then
+    ARCHITECTURE="marketplace"
+    BACKEND=""
+else
+    ARCHITECTURE="${DEPLOYMENT_CONFIG%%-*}"
+    BACKEND="${DEPLOYMENT_CONFIG#*-}"
+fi
 echo "📋 Registering deployment to registry"
 echo "   Project: ${PROJECT_NAME}"

package/templates/do/test CHANGED Viewed

@@ -103,9 +103,9 @@ case "${FRAMEWORK}" in
         case "${MODEL_SERVER}" in
             vllm|sglang)
                 # OpenAI-compatible chat completions format
-                # For S3/JumpStart models, vLLM registers the model under the local path
+                # For S3/registry models, vLLM registers the model under the local path
                 VLLM_MODEL_NAME="${MODEL_NAME}"
-                if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
+                if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
                     VLLM_MODEL_NAME="/opt/ml/model"
                 fi
                 TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
@@ -431,7 +431,7 @@ case "${FRAMEWORK}" in
         case "${MODEL_SERVER}" in
             vllm|sglang)
                 VLLM_MODEL_NAME="${MODEL_NAME}"
-                if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
+                if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
                     VLLM_MODEL_NAME="/opt/ml/model"
                 fi
                 TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
@@ -808,7 +808,7 @@ case "${FRAMEWORK}" in
             vllm|sglang)
                 # OpenAI-compatible chat completions format
                 VLLM_MODEL_NAME="${MODEL_NAME}"
-                if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
+                if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
                     VLLM_MODEL_NAME="/opt/ml/model"
                 fi
                 TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
@@ -1095,7 +1095,7 @@ case "${FRAMEWORK}" in
         case "${MODEL_SERVER}" in
             vllm|sglang)
                 VLLM_MODEL_NAME="${MODEL_NAME}"
-                if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
+                if [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
                     VLLM_MODEL_NAME="/opt/ml/model"
                 fi
                 TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'

package/templates/do/tune CHANGED Viewed

@@ -67,7 +67,7 @@ _parse_args() {
                 ARG_TRAINING_TYPE="$2"; shift 2 ;;
             --model)
                 if [ -z "${2:-}" ]; then
-                    echo "❌ --model requires a JumpStart model ID"
+                    echo "❌ --model requires a model ID"
                     exit 1
                 fi
                 ARG_MODEL="$2"; shift 2 ;;
@@ -287,7 +287,7 @@ for family in sorted(families.keys()):
     for entry in entries:
         techniques = list(entry.get('techniques', {}).keys())
         print(f'    • {entry[\"displayName\"]}')
-        print(f'      ID: {entry[\"jumpStartModelId\"]}')
+        print(f'      ID: {entry[\"modelId\"]}')
         for t in techniques:
             tc = entry['techniques'][t]
             types = ', '.join(tc.get('trainingTypes', []))

package/templates/marketplace/config ADDED Viewed

@@ -0,0 +1,118 @@
+#!/bin/bash
+# do-framework configuration (marketplace)
+# This file is sourced by all do scripts
+# Project identification
+export PROJECT_NAME="<%= projectName %>"
+export DEPLOYMENT_CONFIG="marketplace"
+# Marketplace model package
+export MODEL_PACKAGE_ARN="<%= modelPackageArn %>"
+# AWS configuration
+export AWS_REGION="<%= awsRegion %>"
+# Deployment configuration
+export DEPLOYMENT_TARGET="<%= deploymentTarget %>"
+export INSTANCE_TYPE="<%= instanceType %>"
+<% if (roleArn) { %>
+export ROLE_ARN="<%= roleArn %>"
+<% } %>
+<% if (deploymentTarget === 'async-inference') { %>
+# Async-specific configuration
+ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
+<% if (asyncS3OutputPath) { %>
+export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
+<% } else { %>
+export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
+<% } %>
+<% if (asyncSnsSuccessTopic) { %>
+export ASYNC_SNS_SUCCESS_TOPIC="<%= asyncSnsSuccessTopic %>"
+<% } else { %>
+export ASYNC_SNS_SUCCESS_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-success"
+<% } %>
+<% if (asyncSnsErrorTopic) { %>
+export ASYNC_SNS_ERROR_TOPIC="<%= asyncSnsErrorTopic %>"
+<% } else { %>
+export ASYNC_SNS_ERROR_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-error"
+<% } %>
+<% if (asyncMaxConcurrentInvocations) { %>
+export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
+<% } %>
+<% } %>
+<% if (deploymentTarget === 'batch-transform') { %>
+# Batch Transform configuration
+ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
+<% if (batchInputPath) { %>
+export BATCH_INPUT_PATH="<%= batchInputPath %>"
+<% } else { %>
+export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
+<% } %>
+<% if (batchOutputPath) { %>
+export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
+<% } else { %>
+export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
+<% } %>
+export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
+export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
+export BATCH_STRATEGY="<%= batchStrategy %>"
+export BATCH_JOIN_SOURCE="<%= batchJoinSource || 'None' %>"
+<% if (batchMaxConcurrentTransforms) { %>
+export BATCH_MAX_CONCURRENT_TRANSFORMS="<%= batchMaxConcurrentTransforms %>"
+<% } %>
+<% if (batchMaxPayloadInMB) { %>
+export BATCH_MAX_PAYLOAD_IN_MB="<%= batchMaxPayloadInMB %>"
+<% } %>
+<% } %>
+<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
+# SageMaker AI Benchmarking configuration
+export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
+export BENCHMARK_INPUT_TOKENS_MEAN="<%= benchmarkInputTokensMean %>"
+export BENCHMARK_OUTPUT_TOKENS_MEAN="<%= benchmarkOutputTokensMean %>"
+export BENCHMARK_STREAMING="<%= benchmarkStreaming %>"
+<% if (benchmarkRequestCount) { %>
+export BENCHMARK_REQUEST_COUNT="<%= benchmarkRequestCount %>"
+<% } else { %>
+export BENCHMARK_REQUEST_COUNT=""
+<% } %>
+<% if (benchmarkS3OutputPath) { %>
+export BENCHMARK_S3_OUTPUT_PATH="<%= benchmarkS3OutputPath %>"
+<% } else { %>
+export BENCHMARK_S3_OUTPUT_PATH="s3://mlcc-benchmark-$(aws sts get-caller-identity --query Account --output text)-${AWS_REGION}/${PROJECT_NAME}/"
+<% } %>
+export BENCHMARK_JOB_NAME=""
+export BENCHMARK_WORKLOAD_CONFIG_NAME=""
+<% } %>
+# Allow environment variable overrides
+export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
+export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
+# Print configuration summary
+echo "⚙️  Configuration loaded"
+echo "   Project: ${PROJECT_NAME}"
+echo "   Config:  ${DEPLOYMENT_CONFIG}"
+echo "   Region:  ${AWS_REGION}"
+echo "   Model package: ${MODEL_PACKAGE_ARN}"
+echo "   Deployment target: ${DEPLOYMENT_TARGET}"
+echo "   Instance: ${INSTANCE_TYPE}"
+<% if (deploymentTarget === 'async-inference') { %>
+echo "   S3 output: ${ASYNC_S3_OUTPUT_PATH}"
+echo "   SNS success: ${ASYNC_SNS_SUCCESS_TOPIC}"
+echo "   SNS error: ${ASYNC_SNS_ERROR_TOPIC}"
+<% } else if (deploymentTarget === 'batch-transform') { %>
+echo "   Instance count: ${BATCH_INSTANCE_COUNT}"
+echo "   S3 input: ${BATCH_INPUT_PATH}"
+echo "   S3 output: ${BATCH_OUTPUT_PATH}"
+echo "   Split type: ${BATCH_SPLIT_TYPE}"
+echo "   Strategy: ${BATCH_STRATEGY}"
+<% } %>