npm - @aws/ml-container-creator - Versions diffs - 0.10.0 → 0.12.1 - Mend

@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/LICENSE-THIRD-PARTY +9304 -0
package/bin/cli.js +2 -0
package/config/bootstrap-e2e-stack.json +341 -0
package/config/bootstrap-stack.json +40 -3
package/config/parameter-schema-v2.json +33 -22
package/config/tune-catalog.json +1781 -0
package/infra/ci-harness/buildspec.yml +1 -0
package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
package/package.json +53 -67
package/servers/base-image-picker/index.js +121 -121
package/servers/e2e-status/index.js +297 -0
package/servers/e2e-status/manifest.json +14 -0
package/servers/e2e-status/package.json +15 -0
package/servers/endpoint-picker/LICENSE +202 -0
package/servers/endpoint-picker/index.js +536 -0
package/servers/endpoint-picker/manifest.json +14 -0
package/servers/endpoint-picker/package.json +18 -0
package/servers/hyperpod-cluster-picker/index.js +125 -125
package/servers/instance-sizer/index.js +166 -153
package/servers/instance-sizer/lib/instance-ranker.js +120 -76
package/servers/instance-sizer/lib/model-resolver.js +61 -61
package/servers/instance-sizer/lib/quota-resolver.js +113 -113
package/servers/instance-sizer/lib/vram-estimator.js +31 -31
package/servers/lib/bedrock-client.js +38 -38
package/servers/lib/catalogs/instances.json +27 -0
package/servers/lib/catalogs/model-servers.json +201 -3
package/servers/lib/custom-validators.js +13 -13
package/servers/lib/dynamic-resolver.js +4 -4
package/servers/marketplace-picker/index.js +342 -0
package/servers/marketplace-picker/manifest.json +14 -0
package/servers/marketplace-picker/package.json +18 -0
package/servers/model-picker/index.js +382 -382
package/servers/region-picker/index.js +56 -56
package/servers/workload-picker/LICENSE +202 -0
package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
package/servers/workload-picker/index.js +171 -0
package/servers/workload-picker/manifest.json +16 -0
package/servers/workload-picker/package.json +16 -0
package/src/app.js +12 -3
package/src/lib/bootstrap-command-handler.js +609 -15
package/src/lib/bootstrap-config.js +36 -0
package/src/lib/bootstrap-profile-manager.js +48 -41
package/src/lib/ci-register-helpers.js +74 -0
package/src/lib/config-loader.js +3 -0
package/src/lib/config-manager.js +7 -0
package/src/lib/config-validator.js +1 -1
package/src/lib/cuda-resolver.js +17 -8
package/src/lib/generated/cli-options.js +319 -314
package/src/lib/generated/parameter-matrix.js +672 -661
package/src/lib/generated/validation-rules.js +76 -72
package/src/lib/path-prover-brain.js +664 -0
package/src/lib/prompts/infrastructure-prompts.js +2 -2
package/src/lib/prompts/model-prompts.js +6 -0
package/src/lib/prompts/project-prompts.js +12 -0
package/src/lib/secrets-prompt-runner.js +4 -0
package/src/lib/template-manager.js +1 -1
package/src/lib/template-variable-resolver.js +87 -1
package/src/lib/tune-catalog-validator.js +37 -4
package/templates/Dockerfile +9 -0
package/templates/code/adapter_sidecar.py +444 -0
package/templates/code/serve +6 -0
package/templates/code/serve.d/vllm.ejs +1 -1
package/templates/do/.benchmark_writer.py +1476 -0
package/templates/do/.tune_helper.py +982 -57
package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
package/templates/do/adapter +154 -0
package/templates/do/benchmark +639 -85
package/templates/do/build +5 -0
package/templates/do/clean.d/async-inference.ejs +5 -0
package/templates/do/clean.d/batch-transform.ejs +5 -0
package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
package/templates/do/clean.d/managed-inference.ejs +5 -0
package/templates/do/config +115 -45
package/templates/do/deploy.d/async-inference.ejs +30 -3
package/templates/do/deploy.d/batch-transform.ejs +29 -3
package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
package/templates/do/deploy.d/managed-inference.ejs +216 -14
package/templates/do/lib/endpoint-config.sh +1 -1
package/templates/do/lib/profile.sh +44 -0
package/templates/do/optimize +106 -37
package/templates/do/push +5 -0
package/templates/do/register +94 -0
package/templates/do/stage +567 -0
package/templates/do/submit +7 -0
package/templates/do/test +14 -0
package/templates/do/tune +382 -59
package/templates/do/validate +44 -4

package/templates/do/deploy.d/managed-inference.ejs CHANGED Viewed

@@ -10,9 +10,11 @@ set -o pipefail
 FORCE_NEW=false
 FORCE_IC=false
 IC_TARGET=""
+CI_FLAG=false
 while [ $# -gt 0 ]; do
     case "$1" in
         --force) FORCE_NEW=true; shift ;;
+        --ci) CI_FLAG=true; shift ;;
         --force-ic)
             FORCE_IC=true
             shift
@@ -32,13 +34,14 @@ while [ $# -gt 0 ]; do
             shift 2
             ;;
         --help|-h)
-            echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>]"
+            echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>] [--ci]"
             echo ""
             echo "Options:"
             echo "  --force            Create a new endpoint and IC, even if one already exists."
             echo "  --force-ic         Recreate ALL inference components on the existing endpoint."
             echo "  --force-ic <name>  Recreate only the named IC on the existing endpoint."
             echo "  --ic <name>        Deploy only the named IC (from do/ic/<name>.conf)."
+            echo "  --ci               Enable CI mode (structured errors, timeouts, idempotency)."
             echo ""
             echo "Without flags, deploy resumes from the last run."
             exit 0
@@ -51,9 +54,169 @@ while [ $# -gt 0 ]; do
     esac
 done
+# ============================================================
+# CI-Mode Detection and Configuration
+# ============================================================
+# CI mode is activated by CI_MODE=true env var OR --ci flag
+if [ "${CI_MODE:-false}" = "true" ] || [ "${CI_FLAG}" = "true" ]; then
+    CI_ACTIVE=true
+else
+    CI_ACTIVE=false
+fi
+# CI-mode timeout configuration (seconds)
+if [ "${CI_ACTIVE}" = "true" ]; then
+    CI_DEPLOY_TIMEOUT="${CI_DEPLOY_TIMEOUT_SECONDS:-1200}"
+    CI_DEPLOY_START=$(date +%s)
+fi
+# _ci_emit_error <error_message> <error_type> <retryable>
+#   Emits structured JSON error output when in CI mode.
+#   In non-CI mode, prints human-readable error and exits.
+_ci_emit_error() {
+    local error_msg="$1"
+    local error_type="$2"
+    local retryable="$3"
+    local elapsed=0
+    if [ "${CI_ACTIVE}" = "true" ]; then
+        elapsed=$(( $(date +%s) - CI_DEPLOY_START ))
+        echo "{\"error\":\"${error_msg}\",\"error_type\":\"${error_type}\",\"instance_type\":\"${INSTANCE_TYPE:-unknown}\",\"region\":\"${AWS_REGION:-unknown}\",\"retryable\":${retryable},\"elapsed_seconds\":${elapsed}}"
+        exit 1
+    else
+        echo "❌ ${error_msg}"
+        exit 1
+    fi
+}
+# _ci_check_timeout
+#   Checks if CI-mode timeout has been exceeded.
+#   Emits structured timeout error if so.
+_ci_check_timeout() {
+    if [ "${CI_ACTIVE}" = "true" ]; then
+        local elapsed=$(( $(date +%s) - CI_DEPLOY_START ))
+        if [ "${elapsed}" -ge "${CI_DEPLOY_TIMEOUT}" ]; then
+            _ci_emit_error "Deployment timed out after ${elapsed} seconds (limit: ${CI_DEPLOY_TIMEOUT}s)" "timeout" "true"
+        fi
+    fi
+}
+# _ci_create_endpoint_with_retry
+#   Wraps CreateEndpoint with exponential backoff for throttling.
+#   Base: 5 seconds, max 3 attempts.
+_ci_create_endpoint_with_retry() {
+    local attempt=0
+    local max_attempts=3
+    local backoff=5
+    local ep_name="$1"
+    local ep_config="$2"
+    while [ "${attempt}" -lt "${max_attempts}" ]; do
+        attempt=$(( attempt + 1 ))
+        local create_output
+        create_output=$(aws sagemaker create-endpoint \
+            --endpoint-name "${ep_name}" \
+            --endpoint-config-name "${ep_config}" \
+            --region "${AWS_REGION}" 2>&1) && return 0
+        # Check if throttled
+        if echo "${create_output}" | grep -qi "ThrottlingException"; then
+            if [ "${attempt}" -lt "${max_attempts}" ]; then
+                if [ "${CI_ACTIVE}" = "true" ]; then
+                    echo "⏳ Throttled (attempt ${attempt}/${max_attempts}), retrying in ${backoff}s..."
+                else
+                    echo "⏳ Throttled, retrying in ${backoff}s..."
+                fi
+                sleep "${backoff}"
+                backoff=$(( backoff * 2 ))
+            else
+                _ci_emit_error "CreateEndpoint throttled after ${max_attempts} attempts" "throttled" "true"
+            fi
+        elif echo "${create_output}" | grep -qi "InsufficientInstanceCapacity"; then
+            _ci_emit_error "InsufficientInstanceCapacity: Unable to provision ${INSTANCE_TYPE} in ${AWS_REGION}" "capacity" "true"
+        else
+            # Other API error
+            _ci_emit_error "CreateEndpoint failed: ${create_output}" "api_error" "false"
+        fi
+    done
+}
+# _ci_handle_existing_endpoint
+#   CI-mode idempotent deployment logic.
+#   Returns 0 if deployment should be skipped (already InService with matching config).
+#   Returns 1 if a fresh deploy should proceed.
+#   Handles bad-state cleanup (Failed/OutOfService → delete + recreate).
+_ci_handle_existing_endpoint() {
+    local ep_name="${ENDPOINT_NAME:-}"
+    if [ -z "${ep_name}" ]; then
+        return 1  # No existing endpoint, proceed with fresh deploy
+    fi
+    local ep_status
+    ep_status=$(_get_endpoint_status "${ep_name}" 2>/dev/null || echo "")
+    case "${ep_status}" in
+        InService)
+            # Check if config matches (idempotent check)
+            if [ -n "${INFERENCE_COMPONENT_NAME:-}" ]; then
+                local ic_status
+                ic_status=$(_get_ic_status "${INFERENCE_COMPONENT_NAME}" 2>/dev/null || echo "")
+                if [ "${ic_status}" = "InService" ]; then
+                    echo "✅ [CI] Endpoint InService with matching config — skipping deployment"
+                    echo "   Endpoint: ${ep_name}"
+                    echo "   Inference Component: ${INFERENCE_COMPONENT_NAME}"
+                    return 0
+                fi
+            fi
+            return 1
+            ;;
+        Failed|OutOfService)
+            echo "⚠️  [CI] Endpoint in bad state (${ep_status}): ${ep_name}"
+            echo "   Deleting endpoint for fresh deployment..."
+            aws sagemaker delete-endpoint \
+                --endpoint-name "${ep_name}" \
+                --region "${AWS_REGION}" 2>/dev/null || true
+            # Wait for deletion to complete
+            local delete_start
+            delete_start=$(date +%s)
+            local delete_timeout=300  # 5 minutes
+            while true; do
+                _ci_check_timeout
+                local check_status
+                check_status=$(_get_endpoint_status "${ep_name}" 2>/dev/null || echo "")
+                if [ -z "${check_status}" ]; then
+                    echo "   ✅ Endpoint deleted: ${ep_name}"
+                    break
+                fi
+                local del_elapsed=$(( $(date +%s) - delete_start ))
+                if [ "${del_elapsed}" -ge "${delete_timeout}" ]; then
+                    _ci_emit_error "Endpoint deletion timed out for ${ep_name} (state: ${ep_status})" "endpoint_failed" "true"
+                fi
+                sleep 10
+            done
+            # Clear endpoint name so fresh deploy proceeds
+            ENDPOINT_NAME=""
+            return 1
+            ;;
+        *)
+            return 1  # Unknown/absent state, proceed with fresh deploy
+            ;;
+    esac
+}
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"
@@ -150,14 +313,14 @@ source "${SCRIPT_DIR}/lib/inference-component.sh"
 # Validate execution role ARN
 if [ -z "${ROLE_ARN:-}" ]; then
-    echo "❌ Execution role ARN not provided"
+    echo "❌ ROLE_ARN is not set."
+    echo "   Run 'ml-container-creator bootstrap' to configure your profile,"
+    echo "   or set ROLE_ARN as an environment variable."
     echo ""
     echo "Usage:"
     echo "  export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
     echo "  ./do/deploy"
     echo ""
-    echo "Or set ROLE_ARN in do/config"
-    echo ""
     echo "The execution role must have permissions for:"
     echo "  • SageMaker endpoint and inference component management"
     echo "  • ECR image access"
@@ -193,6 +356,31 @@ fi
 # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
 resolve_secrets
+<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
+# ============================================================
+# Inject server environment variables into container Environment
+# ============================================================
+<% Object.keys(serverEnvVars).forEach(function(key) { %>
+if [ -n "${<%= key %>:-}" ]; then
+    if [ -n "${CONTAINER_ENV_JSON}" ]; then
+        CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
+    else
+        CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
+    fi
+fi
+<% }); %>
+<% } %>
+# ============================================================
+# CI-Mode: Idempotent deployment check (runs before normal idempotency)
+# ============================================================
+if [ "${CI_ACTIVE}" = "true" ] && [ "${FORCE_NEW}" != "true" ]; then
+    if _ci_handle_existing_endpoint; then
+        # Endpoint already InService with matching config — exit successfully
+        exit 0
+    fi
+fi
 # ============================================================
 # Idempotency: check for existing deployment from a previous run
 # ============================================================
@@ -380,16 +568,20 @@ if [ -z "${SKIP_TO}" ]; then
         # Step 2: Create endpoint
         echo "🚀 Creating endpoint: ${ENDPOINT_NAME}"
-        if ! aws sagemaker create-endpoint \
-            --endpoint-name "${ENDPOINT_NAME}" \
-            --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
-            --region "${AWS_REGION}"; then
-            echo "❌ Failed to create endpoint"
-            echo "   Check that:"
-            echo "   • Your IAM credentials have sagemaker:CreateEndpoint permission"
-            echo "   • You have sufficient service quota in region: ${AWS_REGION}"
-            exit 4
+        if [ "${CI_ACTIVE}" = "true" ]; then
+            _ci_create_endpoint_with_retry "${ENDPOINT_NAME}" "${ENDPOINT_CONFIG_NAME}"
+        else
+            if ! aws sagemaker create-endpoint \
+                --endpoint-name "${ENDPOINT_NAME}" \
+                --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
+                --region "${AWS_REGION}"; then
+                echo "❌ Failed to create endpoint"
+                echo "   Check that:"
+                echo "   • Your IAM credentials have sagemaker:CreateEndpoint permission"
+                echo "   • You have sufficient service quota in region: ${AWS_REGION}"
+                exit 4
+            fi
         fi
         echo "✅ Endpoint creation initiated: ${ENDPOINT_NAME}"
@@ -413,8 +605,18 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
     echo "   This may take a few minutes..."
     echo "   If this times out, re-run ./do/deploy to resume."
+    # CI-mode: check timeout during wait
+    if [ "${CI_ACTIVE}" = "true" ]; then
+        _ci_check_timeout
+    fi
     wait_endpoint "${ENDPOINT_NAME}"
+    # CI-mode: check timeout after wait completes
+    if [ "${CI_ACTIVE}" = "true" ]; then
+        _ci_check_timeout
+    fi
     echo "✅ Endpoint is InService: ${ENDPOINT_NAME}"
 fi

package/templates/do/lib/endpoint-config.sh CHANGED Viewed

@@ -165,7 +165,7 @@ create_endpoint_config() {
         # Optional: capacity reservation
         if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
             variant_json="${variant_json},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
-            echo "   ⚠️  Capacity reservation (experimental): ${CAPACITY_RESERVATION_ARN}"
+            echo "   ✓ Capacity reservation: ${CAPACITY_RESERVATION_ARN}"
         fi
         variant_json="${variant_json}}]"

package/templates/do/lib/profile.sh ADDED Viewed

@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# Profile loader — reads active bootstrap profile into _PROFILE[] associative array.
+# Source this file after do/config. Values provide defaults; explicit env vars take precedence.
+#
+# Requires bash 4+ for associative array support.
+# macOS ships with bash 3.2 — install bash 4+ via Homebrew: brew install bash
+#
+# Expected keys in _PROFILE:
+#   awsRegion, accountId, awsProfile, roleArn, ecrRepositoryName,
+#   benchmarkS3Bucket, ciBenchmarkResultsBucket, asyncS3Bucket, batchS3Bucket,
+#   ciTableName, ciInfraProvisioned
+# Temporarily disable unbound variable checking for profile loading
+# (keys may not exist in the profile config, and declare -A behavior
+# varies across bash versions with set -u)
+set +u 2>/dev/null || true
+declare -A _PROFILE 2>/dev/null || true
+if command -v python3 &>/dev/null; then
+    _PROFILE_RAW=$(python3 -c "
+import json, os
+try:
+    with open(os.path.expanduser('~/.ml-container-creator/config.json')) as f:
+        c = json.load(f)
+    p = c['profiles'][c['activeProfile']]
+    # Output as KEY=VALUE lines (simple, no JSON parsing in bash)
+    for k, v in p.items():
+        if isinstance(v, (str, int, float, bool)):
+            print(f'{k}={v}')
+except:
+    pass
+" 2>/dev/null) || _PROFILE_RAW=""
+    if [ -n "${_PROFILE_RAW}" ]; then
+        while IFS='=' read -r key value; do
+            [ -n "${key}" ] && _PROFILE["${key}"]="${value}"
+        done <<< "${_PROFILE_RAW}"
+    fi
+fi
+# NOTE: set -u is NOT re-enabled here. The caller is responsible for managing
+# their own shell options. Re-enabling set -u would cause "unbound variable"
+# errors when accessing _PROFILE keys on bash versions where empty associative
+# arrays are treated as unset (bash 5.x on some platforms).

package/templates/do/optimize CHANGED Viewed

@@ -106,6 +106,30 @@ elif [ -n "${INSTANCE_POOLS:-}" ]; then
 elif [ -n "${INSTANCE_TYPE:-}" ]; then
     INSTANCE_TYPES="${INSTANCE_TYPE}"
     echo "   Instances (from INSTANCE_TYPE): ${INSTANCE_TYPES}"
+elif [ "${ENDPOINT_EXTERNAL:-}" = "true" ] && [ -n "${ENDPOINT_NAME:-}" ]; then
+    # External endpoint — query the live endpoint config for instance type
+    echo "   Discovering instance type from external endpoint: ${ENDPOINT_NAME}"
+    ENDPOINT_CONFIG_NAME=$(aws sagemaker describe-endpoint \
+        --endpoint-name "${ENDPOINT_NAME}" \
+        --region "${AWS_REGION}" \
+        --query 'EndpointConfigName' \
+        --output text 2>/dev/null) || ENDPOINT_CONFIG_NAME=""
+    if [ -n "${ENDPOINT_CONFIG_NAME}" ]; then
+        INSTANCE_TYPES=$(aws sagemaker describe-endpoint-config \
+            --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
+            --region "${AWS_REGION}" \
+            --query 'ProductionVariants[0].InstanceType' \
+            --output text 2>/dev/null) || INSTANCE_TYPES=""
+    fi
+    if [ -n "${INSTANCE_TYPES}" ] && [ "${INSTANCE_TYPES}" != "None" ]; then
+        echo "   Instances (from endpoint): ${INSTANCE_TYPES}"
+    else
+        echo "❌ Could not discover instance type from endpoint: ${ENDPOINT_NAME}"
+        echo "   Provide --instances flag, or set INSTANCE_TYPE in do/config."
+        exit 1
+    fi
 else
     echo "❌ No instance types available."
     echo "   Provide --instances flag, or set INSTANCE_POOLS or INSTANCE_TYPE in do/config."
@@ -132,9 +156,9 @@ RESUME_EXISTING=false
 if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
     EXISTING_STATUS=$(aws sagemaker describe-ai-recommendation-job \
-        --job-name "${OPTIMIZE_JOB_NAME}" \
+        --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
         --region "${AWS_REGION}" \
-        --query 'Status' \
+        --query 'AIRecommendationJobStatus' \
         --output text 2>/dev/null) || EXISTING_STATUS=""
     case "${EXISTING_STATUS}" in
@@ -154,7 +178,7 @@ if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
             ;;
         FAILED|STOPPED)
             FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
-                --job-name "${OPTIMIZE_JOB_NAME}" \
+                --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
                 --region "${AWS_REGION}" \
                 --query 'FailureReason' \
                 --output text 2>/dev/null) || FAILURE_REASON="unknown"
@@ -174,44 +198,86 @@ fi
 # ── Create recommendation job ─────────────────────────────────────────────────
 if [ "${RESUME_EXISTING}" = false ]; then
     OPTIMIZE_JOB_NAME="${PROJECT_NAME}-optimize-$(date +%Y%m%d-%H%M%S)"
+    WORKLOAD_CONFIG_NAME="${OPTIMIZE_JOB_NAME}-workload"
     echo "🚀 Creating AI Recommendation Job: ${OPTIMIZE_JOB_NAME}"
-    # Build instance type list as JSON array
-    INSTANCE_TYPES_JSON="["
-    FIRST=true
+    # Step 1: Create workload config
+    echo "   Creating workload config: ${WORKLOAD_CONFIG_NAME}"
+    WORKLOAD_SPEC_INNER="{\"benchmark\": {\"type\": \"aiperf\"}, \"parameters\": {\"prompt_input_tokens_mean\": ${INPUT_TOKENS}, \"prompt_input_tokens_stddev\": 150, \"output_tokens_mean\": ${OUTPUT_TOKENS}, \"output_tokens_stddev\": 50, \"concurrency\": ${CONCURRENCY}, \"streaming\": true}}"
+    WORKLOAD_SPEC_OUTER="{\"WorkloadSpec\": {\"Inline\": $(python3 -c "import json; print(json.dumps('${WORKLOAD_SPEC_INNER}'))")}}"
+    if ! aws sagemaker create-ai-workload-config \
+        --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
+        --ai-workload-configs "${WORKLOAD_SPEC_OUTER}" \
+        --region "${AWS_REGION}" 2>&1 | grep -q "AIWorkloadConfigArn"; then
+        echo "❌ Failed to create workload config: ${WORKLOAD_CONFIG_NAME}"
+        echo "   Check that the execution role has sagemaker:CreateAIWorkloadConfig permission."
+        # Show actual error
+        aws sagemaker create-ai-workload-config \
+            --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
+            --ai-workload-configs "${WORKLOAD_SPEC_OUTER}" \
+            --region "${AWS_REGION}" 2>&1 || true
+        exit 1
+    fi
+    echo "   ✅ Workload config created"
+    # Step 2: Build compute spec (instance types, max 3)
+    COMPUTE_SPEC_JSON=""
     IFS=',' read -ra TYPES <<< "${INSTANCE_TYPES}"
+    INSTANCE_LIST=""
+    COUNT=0
     for itype in "${TYPES[@]}"; do
         itype=$(echo "${itype}" | xargs)  # trim whitespace
-        if [ "${FIRST}" = true ]; then
-            INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON}\"${itype}\""
-            FIRST=false
+        if [ ${COUNT} -ge 3 ]; then
+            echo "   ⚠️  Max 3 instance types supported — truncating"
+            break
+        fi
+        if [ -n "${INSTANCE_LIST}" ]; then
+            INSTANCE_LIST="${INSTANCE_LIST},\"${itype}\""
         else
-            INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON},\"${itype}\""
+            INSTANCE_LIST="\"${itype}\""
         fi
+        COUNT=$((COUNT + 1))
     done
-    INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON}]"
+    COMPUTE_SPEC_JSON="InstanceTypes=[${INSTANCE_LIST}]"
+    # Step 3: Map goal to performance target metric
+    case "${GOAL}" in
+        latency) PERF_METRIC="ttft-ms" ;;
+        throughput) PERF_METRIC="throughput" ;;
+        cost) PERF_METRIC="cost" ;;
+    esac
-    # Build job input config
-    # The model is specified as either a HuggingFace model ID or S3 path
-    MODEL_SOURCE_JSON=""
+    # Step 4: Determine model source
+    # The recommendation API requires model artifacts as s3:// or https:// URI.
+    MODEL_SOURCE_ARG=""
     if [[ "${MODEL_NAME}" == s3://* ]]; then
-        MODEL_SOURCE_JSON="{\"S3DataSource\":{\"S3Uri\":\"${MODEL_NAME}\"}}"
+        MODEL_SOURCE_ARG="S3={S3Uri=${MODEL_NAME}}"
     else
-        MODEL_SOURCE_JSON="{\"ModelName\":\"${MODEL_NAME}\"}"
+        # HuggingFace model — use the HTTPS URL for the model on HuggingFace Hub
+        MODEL_SOURCE_ARG="S3={S3Uri=https://huggingface.co/${MODEL_NAME}}"
     fi
-    # Build workload config
-    WORKLOAD_JSON="{\"Concurrency\":${CONCURRENCY},\"InputTokens\":${INPUT_TOKENS},\"OutputTokens\":${OUTPUT_TOKENS}}"
-    # Build the full job specification
-    JOB_INPUT="{\"ModelSource\":${MODEL_SOURCE_JSON},\"Workload\":${WORKLOAD_JSON},\"InstanceTypes\":${INSTANCE_TYPES_JSON},\"OptimizationGoal\":\"${GOAL}\"}"
-    if ! aws sagemaker create-ai-recommendation-job \
-        --job-name "${OPTIMIZE_JOB_NAME}" \
-        --role-arn "${ROLE_ARN}" \
-        --input-config "${JOB_INPUT}" \
-        --region "${AWS_REGION}"; then
+    # Step 5: Create the recommendation job
+    S3_OUTPUT="s3://${TUNE_S3_BUCKET:-mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null)-${AWS_REGION}}/${PROJECT_NAME}/optimize/"
+    RECOMMEND_CMD=(
+        aws sagemaker create-ai-recommendation-job
+        --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}"
+        --model-source "${MODEL_SOURCE_ARG}"
+        --output-config "S3OutputLocation=${S3_OUTPUT}"
+        --ai-workload-config-identifier "${WORKLOAD_CONFIG_NAME}"
+        --performance-target "Constraints=[{Metric=${PERF_METRIC}}]"
+        --role-arn "${ROLE_ARN}"
+        --compute-spec "${COMPUTE_SPEC_JSON}"
+        --inference-specification "Framework=VLLM"
+        --region "${AWS_REGION}"
+    )
+    if ! "${RECOMMEND_CMD[@]}" 2>&1; then
+        echo ""
         echo "❌ Failed to create AI Recommendation Job"
         echo "   Check that:"
         echo "   • The execution role has sagemaker:CreateAIRecommendationJob permission"
@@ -232,7 +298,7 @@ fi
 POLL_INTERVAL=30
 MAX_POLL_ATTEMPTS=120  # 60 minutes max (120 * 30s)
-if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ]; then
+if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ] && [ "${JOB_STATUS:-}" != "Completed" ] && [ "${JOB_STATUS:-}" != "Failed" ] && [ "${JOB_STATUS:-}" != "Stopped" ]; then
 echo "⏳ Waiting for recommendation job to complete..."
 echo "   Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 60 min)"
@@ -243,27 +309,30 @@ JOB_STATUS=""
 while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
     JOB_STATUS=$(aws sagemaker describe-ai-recommendation-job \
-        --job-name "${OPTIMIZE_JOB_NAME}" \
+        --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
         --region "${AWS_REGION}" \
-        --query 'Status' \
+        --query 'AIRecommendationJobStatus' \
         --output text 2>/dev/null) || {
         echo "⚠️  Failed to describe recommendation job (credentials may have expired)"
         echo "   Re-run to check status:"
-        echo "   aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
+        echo "   aws sagemaker describe-ai-recommendation-job --ai-recommendation-job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
         exit 1
     }
     case "${JOB_STATUS}" in
-        COMPLETED)
+        COMPLETED|Completed)
             echo "✅ Recommendation job completed!"
+            JOB_STATUS="COMPLETED"
             break
             ;;
-        FAILED)
+        FAILED|Failed)
             echo "❌ Recommendation job failed"
+            JOB_STATUS="FAILED"
             break
             ;;
-        STOPPED)
+        STOPPED|Stopped)
             echo "⚠️  Recommendation job was stopped"
+            JOB_STATUS="STOPPED"
             break
             ;;
         *)
@@ -292,7 +361,7 @@ if [ "${JOB_STATUS}" = "COMPLETED" ]; then
     # Get the full job description with results
     JOB_DESCRIPTION=$(aws sagemaker describe-ai-recommendation-job \
-        --job-name "${OPTIMIZE_JOB_NAME}" \
+        --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
         --region "${AWS_REGION}" \
         --output json 2>/dev/null) || {
         echo "❌ Failed to fetch recommendation results"
@@ -503,7 +572,7 @@ except:
     if [ "${FAILURE_REASON}" = "unknown" ]; then
         FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
-            --job-name "${OPTIMIZE_JOB_NAME}" \
+            --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
             --region "${AWS_REGION}" \
             --query 'FailureReason' \
             --output text 2>/dev/null) || FAILURE_REASON="unknown"
@@ -513,7 +582,7 @@ except:
     echo "   Reason: ${FAILURE_REASON}"
     echo ""
     echo "   Debug:"
-    echo "   aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
+    echo "   aws sagemaker describe-ai-recommendation-job --ai-recommendation-job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
 elif [ "${JOB_STATUS}" = "STOPPED" ]; then
     echo "⚠️  Recommendation job was stopped before completion"

package/templates/do/push CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 echo "🚀 Pushing Docker image to Amazon ECR"
 echo "   Project: ${PROJECT_NAME}"