npm - @aws/ml-container-creator - Versions diffs - 0.3.0 → 0.5.0 - Mend

@aws/ml-container-creator 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/bin/cli.js +5 -2
package/config/bootstrap-stack.json +86 -7
package/config/defaults.json +1 -1
package/infra/ci-harness/buildspec.yml +60 -0
package/package.json +3 -1
package/servers/README.md +41 -1
package/servers/instance-sizer/index.js +42 -2
package/servers/instance-sizer/lib/instance-ranker.js +114 -10
package/servers/instance-sizer/lib/quota-resolver.js +368 -0
package/servers/instance-sizer/package.json +2 -0
package/servers/lib/catalogs/instances.json +527 -12
package/servers/lib/catalogs/model-servers.json +15 -15
package/servers/lib/catalogs/model-sizes.json +27 -0
package/servers/lib/catalogs/models.json +71 -0
package/servers/lib/schemas/image-catalog.schema.json +9 -1
package/src/app.js +109 -3
package/src/lib/bootstrap-command-handler.js +96 -3
package/src/lib/cli-handler.js +2 -2
package/src/lib/config-manager.js +117 -1
package/src/lib/deployment-entry-schema.js +16 -0
package/src/lib/prompt-runner.js +270 -12
package/src/lib/prompts.js +288 -6
package/src/lib/registry-command-handler.js +12 -0
package/src/lib/schema-sync.js +31 -0
package/src/lib/template-manager.js +49 -1
package/src/lib/validate-runner.js +125 -2
package/templates/Dockerfile +22 -2
package/templates/code/cuda_compat.sh +22 -0
package/templates/code/serve +3 -0
package/templates/code/serving.properties +14 -0
package/templates/code/start_server.sh +3 -0
package/templates/diffusors/Dockerfile +2 -1
package/templates/diffusors/serve +3 -0
package/templates/do/README.md +33 -0
package/templates/do/adapter +1214 -0
package/templates/do/adapters/.gitkeep +2 -0
package/templates/do/add-ic +130 -0
package/templates/do/benchmark +718 -0
package/templates/do/clean +593 -17
package/templates/do/config +49 -4
package/templates/do/deploy +513 -362
package/templates/do/ic/default.conf +32 -0
package/templates/do/lib/endpoint-config.sh +216 -0
package/templates/do/lib/inference-component.sh +167 -0
package/templates/do/lib/secrets.sh +44 -0
package/templates/do/lib/wait.sh +131 -0
package/templates/do/logs +107 -27
package/templates/do/optimize +528 -0
package/templates/do/register +119 -2
package/templates/do/status +337 -0
package/templates/do/test +80 -28
package/templates/triton/Dockerfile +5 -0

package/templates/do/register CHANGED Viewed

@@ -89,6 +89,103 @@ case "${STATUS}" in
         ;;
 esac
+# ============================================================
+# Build IC list from do/ic/ directory (multi-IC support)
+# ============================================================
+<% if (deploymentTarget === 'realtime-inference') { %>
+IC_LIST_JSON="[]"
+if [ -d "${SCRIPT_DIR}/ic" ]; then
+    # Build IC list from all conf files (alphabetical order)
+    IC_ENTRIES=""
+    IC_COUNT=0
+    for conf in "${SCRIPT_DIR}"/ic/*.conf; do
+        [ -f "${conf}" ] || continue
+        # Source the IC config to get its variables
+        (
+            # Subshell to avoid polluting current environment
+            source "${conf}" 2>/dev/null
+            echo "${IC_DEPLOYED_NAME:-}|${IC_IMAGE_TAG:-}|${IC_GPU_COUNT:-1}|${IC_COPY_COUNT:-1}"
+        ) > /tmp/ic_entry_$$ 2>/dev/null
+        IC_ENTRY=$(cat /tmp/ic_entry_$$ 2>/dev/null || echo "|||")
+        rm -f /tmp/ic_entry_$$
+        IC_BASENAME=$(basename "${conf}" .conf)
+        IC_ENTRY_IMAGE=$(echo "${IC_ENTRY}" | cut -d'|' -f2)
+        IC_ENTRY_GPU=$(echo "${IC_ENTRY}" | cut -d'|' -f3)
+        IC_ENTRY_COPY=$(echo "${IC_ENTRY}" | cut -d'|' -f4)
+        if [ -n "${IC_ENTRIES}" ]; then
+            IC_ENTRIES="${IC_ENTRIES},"
+        fi
+        IC_ENTRIES="${IC_ENTRIES}{\"name\":\"${IC_BASENAME}\",\"image\":\"${IC_ENTRY_IMAGE}\",\"gpuCount\":${IC_ENTRY_GPU:-1},\"copyCount\":${IC_ENTRY_COPY:-1}}"
+        IC_COUNT=$((IC_COUNT + 1))
+    done
+    if [ "${CI_MODE}" = true ] && [ ${IC_COUNT} -gt 1 ]; then
+        # CI mode: only include the first IC (alphabetically) to keep CI costs down
+        FIRST_CONF=$(ls "${SCRIPT_DIR}"/ic/*.conf 2>/dev/null | head -1)
+        if [ -n "${FIRST_CONF}" ]; then
+            (
+                source "${FIRST_CONF}" 2>/dev/null
+                echo "${IC_DEPLOYED_NAME:-}|${IC_IMAGE_TAG:-}|${IC_GPU_COUNT:-1}|${IC_COPY_COUNT:-1}"
+            ) > /tmp/ic_first_$$ 2>/dev/null
+            FIRST_ENTRY=$(cat /tmp/ic_first_$$ 2>/dev/null || echo "|||")
+            rm -f /tmp/ic_first_$$
+            FIRST_BASENAME=$(basename "${FIRST_CONF}" .conf)
+            FIRST_IMAGE=$(echo "${FIRST_ENTRY}" | cut -d'|' -f2)
+            FIRST_GPU=$(echo "${FIRST_ENTRY}" | cut -d'|' -f3)
+            FIRST_COPY=$(echo "${FIRST_ENTRY}" | cut -d'|' -f4)
+            IC_LIST_JSON="[{\"name\":\"${FIRST_BASENAME}\",\"image\":\"${FIRST_IMAGE}\",\"gpuCount\":${FIRST_GPU:-1},\"copyCount\":${FIRST_COPY:-1}}]"
+        fi
+    else
+        IC_LIST_JSON="[${IC_ENTRIES}]"
+    fi
+else
+    # Legacy: single IC from do/config
+    IC_LIST_JSON="[{\"name\":\"default\",\"image\":\"${PROJECT_NAME}-latest\",\"gpuCount\":${IC_GPU_COUNT:-1},\"copyCount\":${IC_COPY_COUNT:-1}}]"
+fi
+# Append adapter entries from do/adapters/*.conf
+ADAPTER_COUNT=0
+if [ -d "${SCRIPT_DIR}/adapters" ]; then
+    ADAPTER_ENTRIES=""
+    for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
+        [ -f "${conf}" ] || continue
+        [[ "$(basename "${conf}")" == ".gitkeep" ]] && continue
+        ADAPTER_NAME_VAL=""
+        ADAPTER_WEIGHTS_VAL=""
+        ADAPTER_IC_VAL=""
+        eval "$(grep '^export ADAPTER_NAME=' "${conf}" 2>/dev/null)"
+        eval "$(grep '^export ADAPTER_WEIGHTS_URI=' "${conf}" 2>/dev/null)"
+        eval "$(grep '^export ADAPTER_IC_NAME=' "${conf}" 2>/dev/null)"
+        ADAPTER_NAME_VAL="${ADAPTER_NAME:-$(basename "${conf}" .conf)}"
+        ADAPTER_WEIGHTS_VAL="${ADAPTER_WEIGHTS_URI:-}"
+        ADAPTER_IC_VAL="${ADAPTER_IC_NAME:-}"
+        if [ -n "${ADAPTER_ENTRIES}" ]; then
+            ADAPTER_ENTRIES="${ADAPTER_ENTRIES},"
+        fi
+        ADAPTER_ENTRIES="${ADAPTER_ENTRIES}{\"name\":\"${ADAPTER_NAME_VAL}\",\"isAdapter\":true,\"baseIcName\":\"${ADAPTER_IC_VAL}\",\"artifactUrl\":\"${ADAPTER_WEIGHTS_VAL}\",\"gpuCount\":0,\"copyCount\":1}"
+        ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
+        unset ADAPTER_NAME ADAPTER_WEIGHTS_URI ADAPTER_IC_NAME
+    done
+    if [ -n "${ADAPTER_ENTRIES}" ] && [ "${IC_LIST_JSON}" != "[]" ]; then
+        # Append adapters to existing IC list
+        IC_LIST_JSON="${IC_LIST_JSON%]},${ADAPTER_ENTRIES}]"
+    elif [ -n "${ADAPTER_ENTRIES}" ]; then
+        IC_LIST_JSON="[${ADAPTER_ENTRIES}]"
+    fi
+fi
+<% } %>
 # ============================================================
 # Derive architecture and backend from DEPLOYMENT_CONFIG
 # ============================================================
@@ -293,7 +390,7 @@ echo ""
 # ============================================================
 compute_config_id() {
-    local input="${DEPLOYMENT_CONFIG}:${MODEL_NAME:-none}:${INSTANCE_TYPE}:${AWS_REGION}:${DEPLOYMENT_TARGET}"
+    local input="${DEPLOYMENT_CONFIG}:${MODEL_NAME:-none}:${INSTANCE_TYPE}:${AWS_REGION}:${DEPLOYMENT_TARGET}:ic${IC_COUNT:-1}:adapt${ADAPTER_COUNT:-0}"
     # Use sha256sum (Linux) with fallback to shasum (macOS)
     if command -v sha256sum &> /dev/null; then
         echo -n "$input" | sha256sum | cut -c1-16
@@ -373,6 +470,9 @@ write_ci_record() {
     "modelWeight": ${IC_MODEL_WEIGHT}
 <% } %>
   },
+<% } %>
+<% if (deploymentTarget === 'realtime-inference') { %>
+  "icList": ${IC_LIST_JSON},
 <% } %>
   "parameters": ${PARAMETERS}
 }
@@ -393,6 +493,7 @@ CJEOF
     # Try put-item with condition (new record)
     if aws dynamodb put-item \
+        --region "${AWS_REGION}" \
         --table-name "${CI_TABLE_NAME}" \
         --item "{
             \"configId\": {\"S\": \"${config_id}\"},
@@ -412,6 +513,7 @@ CJEOF
     else
         # Record already exists — update it (reset testStatus, update configJson, preserve createdAt)
         if aws dynamodb update-item \
+            --region "${AWS_REGION}" \
             --table-name "${CI_TABLE_NAME}" \
             --key "{\"configId\": {\"S\": \"${config_id}\"}}" \
             --update-expression "SET configJson = :cj, testStatus = :ts, deploymentConfig = :dc, baseImage = :bi, baseImageVersion = :bv, buildStrategy = :bs, projectName = :pn, schemaVersion = :sv" \
@@ -486,6 +588,9 @@ if [ "${JSON_OUTPUT}" = true ] || [ "${CI_MODE}" = true ]; then
     "modelWeight": ${IC_MODEL_WEIGHT}
 <% } %>
   },
+<% } %>
+<% if (deploymentTarget === 'realtime-inference') { %>
+  "icList": ${IC_LIST_JSON},
 <% } %>
   "parameters": ${PARAMETERS}
 }
@@ -496,6 +601,11 @@ DJEOF
     echo "${DEPLOYMENT_JSON}" | python3 -c "import sys,json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "${DEPLOYMENT_JSON}"
     if [ "${CI_MODE}" = true ]; then
+        # Strip capacity reservation ARN for CI — force on-demand deployment
+        # CI projects must never use reserved capacity (reservations are account-specific
+        # and time-bound; CI replay should always target on-demand instances)
+        unset CAPACITY_RESERVATION_ARN 2>/dev/null || true
         echo ""
         echo "⚠️  CI Integration is experimental and currently only tested for"
         echo "   SageMaker Real-Time Inference endpoints."
@@ -507,7 +617,7 @@ DJEOF
         echo "🔑 configId: ${CONFIG_ID}"
         # Check if CI_Table exists before writing
-        if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" &>/dev/null; then
+        if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" --region "${AWS_REGION}" &>/dev/null; then
             echo ""
             echo "⚠️  CI infrastructure not provisioned. Run 'ml-container-creator bootstrap' with CI enabled."
             echo "   Skipping CI table write."
@@ -567,6 +677,13 @@ fi
 # Pass parameters as JSON string
 CMD_ARGS+=("--parameters" "${PARAMETERS}")
+# Pass IC list as JSON string
+<% if (deploymentTarget === 'realtime-inference') { %>
+if [ "${IC_LIST_JSON}" != "[]" ]; then
+    CMD_ARGS+=("--ic-list" "${IC_LIST_JSON}")
+fi
+<% } %>
 # Pass generator version from package.json if available
 GENERATOR_VERSION=""
 if command -v node &> /dev/null; then

package/templates/do/status ADDED Viewed

@@ -0,0 +1,337 @@
+#!/bin/bash
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+set -e
+set -u
+set -o pipefail
+# Source configuration
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/config"
+# ============================================================
+# SageMaker Real-Time Inference Status
+# ============================================================
+# Validate AWS credentials
+if ! aws sts get-caller-identity &> /dev/null; then
+    echo "❌ AWS credentials not configured"
+    echo "   Run: aws configure"
+    exit 4
+fi
+# Check that we have an endpoint to query
+if [ -z "${ENDPOINT_NAME:-}" ]; then
+    echo "❌ No endpoint configured"
+    echo "   Run ./do/deploy first to create an endpoint."
+    exit 1
+fi
+# ============================================================
+# Describe Endpoint
+# ============================================================
+ENDPOINT_JSON=$(aws sagemaker describe-endpoint \
+    --endpoint-name "${ENDPOINT_NAME}" \
+    --region "${AWS_REGION}" 2>/dev/null) || {
+    echo "❌ Endpoint not found: ${ENDPOINT_NAME}"
+    echo "   The endpoint may have been deleted. Run ./do/deploy to create a new one."
+    exit 1
+}
+EP_STATUS=$(echo "${ENDPOINT_JSON}" | grep -o '"EndpointStatus":"[^"]*"' | head -1 | cut -d'"' -f4)
+EP_INSTANCE_TYPE=$(echo "${ENDPOINT_JSON}" | grep -o '"InstanceType":"[^"]*"' | head -1 | cut -d'"' -f4)
+EP_INSTANCE_COUNT=$(echo "${ENDPOINT_JSON}" | grep -o '"CurrentInstanceCount":[0-9]*' | head -1 | cut -d':' -f2)
+# Fallback for instance count if not available
+if [ -z "${EP_INSTANCE_COUNT}" ]; then
+    EP_INSTANCE_COUNT=$(echo "${ENDPOINT_JSON}" | grep -o '"InitialInstanceCount":[0-9]*' | head -1 | cut -d':' -f2)
+fi
+EP_INSTANCE_COUNT="${EP_INSTANCE_COUNT:-1}"
+# Use INSTANCE_TYPE from config as fallback if not in describe response
+EP_INSTANCE_TYPE="${EP_INSTANCE_TYPE:-${INSTANCE_TYPE:-unknown}}"
+# GPU count lookup for the instance type
+_get_instance_gpus() {
+    local itype="$1"
+    case "${itype}" in
+        ml.g4dn.xlarge)     echo 1 ;;
+        ml.g4dn.12xlarge)   echo 4 ;;
+        ml.g5.xlarge)       echo 1 ;;
+        ml.g5.2xlarge)      echo 1 ;;
+        ml.g5.4xlarge)      echo 1 ;;
+        ml.g5.8xlarge)      echo 1 ;;
+        ml.g5.12xlarge)     echo 4 ;;
+        ml.g5.48xlarge)     echo 8 ;;
+        ml.g6.xlarge)       echo 1 ;;
+        ml.g6.12xlarge)     echo 4 ;;
+        ml.g6.48xlarge)     echo 8 ;;
+        ml.g6e.xlarge)      echo 1 ;;
+        ml.g6e.2xlarge)     echo 1 ;;
+        ml.g6e.4xlarge)     echo 1 ;;
+        ml.g6e.8xlarge)     echo 1 ;;
+        ml.g6e.12xlarge)    echo 4 ;;
+        ml.g6e.48xlarge)    echo 8 ;;
+        ml.g7e.xlarge)      echo 1 ;;
+        ml.g7e.2xlarge)     echo 1 ;;
+        ml.g7e.4xlarge)     echo 1 ;;
+        ml.g7e.8xlarge)     echo 1 ;;
+        ml.g7e.12xlarge)    echo 4 ;;
+        ml.g7e.48xlarge)    echo 8 ;;
+        ml.p3.2xlarge)      echo 1 ;;
+        ml.p3.8xlarge)      echo 4 ;;
+        ml.p3.16xlarge)     echo 8 ;;
+        ml.p4d.24xlarge)    echo 8 ;;
+        ml.p4de.24xlarge)   echo 8 ;;
+        ml.p5.48xlarge)     echo 8 ;;
+        *)                  echo "" ;;
+    esac
+}
+INSTANCE_GPUS=$(_get_instance_gpus "${EP_INSTANCE_TYPE}")
+TOTAL_GPUS=""
+if [ -n "${INSTANCE_GPUS}" ]; then
+    TOTAL_GPUS=$(( INSTANCE_GPUS * EP_INSTANCE_COUNT ))
+fi
+# ============================================================
+# Detect Instance Pools
+# ============================================================
+HAS_INSTANCE_POOLS=false
+if echo "${ENDPOINT_JSON}" | grep -q '"InstancePools"'; then
+    HAS_INSTANCE_POOLS=true
+fi
+# ============================================================
+# Print Endpoint Status
+# ============================================================
+echo ""
+if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
+    echo "Endpoint: ${ENDPOINT_NAME} (external) [${EP_STATUS}]"
+else
+    echo "Endpoint: ${ENDPOINT_NAME} [${EP_STATUS}]"
+fi
+if [ "${HAS_INSTANCE_POOLS}" = "true" ]; then
+    # Instance pools path: show per-pool information
+    echo "Instance Pools:"
+    # Extract pool entries from the DescribeEndpoint response
+    # Each pool has: InstanceType, Priority, and CurrentInstanceCount (from the running endpoint)
+    # Parse using grep/sed — pools are in ProductionVariants[0].InstancePools array
+    pool_entries=$(echo "${ENDPOINT_JSON}" | grep -oE '"InstancePools"\s*:\s*\[[^]]*\]' | head -1 | sed 's/"InstancePools"\s*:\s*//')
+    if [ -n "${pool_entries}" ]; then
+        # Extract individual pool objects
+        # Each pool: {"InstanceType":"ml.xxx","Priority":N} — may also have CurrentInstanceCount
+        pool_types=$(echo "${pool_entries}" | grep -oE '"InstanceType"\s*:\s*"[^"]+"' | sed 's/"InstanceType"\s*:\s*"//;s/"$//')
+        pool_priorities=$(echo "${pool_entries}" | grep -oE '"Priority"\s*:\s*[0-9]+' | sed 's/"Priority"\s*:\s*//')
+        # CurrentInstanceCount may appear per-pool in the response
+        # If not per-pool, fall back to the endpoint-level CurrentInstanceCount
+        pool_instance_counts=$(echo "${pool_entries}" | grep -oE '"CurrentInstanceCount"\s*:\s*[0-9]+' | sed 's/"CurrentInstanceCount"\s*:\s*//')
+        # Convert to arrays
+        IFS=$'\n' read -r -d '' -a types_arr <<< "${pool_types}" || true
+        IFS=$'\n' read -r -d '' -a priorities_arr <<< "${pool_priorities}" || true
+        IFS=$'\n' read -r -d '' -a counts_arr <<< "${pool_instance_counts}" || true
+        for i in "${!types_arr[@]}"; do
+            local_type="${types_arr[$i]}"
+            local_priority="${priorities_arr[$i]:-$((i+1))}"
+            local_count="${counts_arr[$i]:-0}"
+            # Mark pools with instances > 0 as active
+            if [ "${local_count}" -gt 0 ] 2>/dev/null; then
+                printf "  Priority %s: %-20s (%s instances) ← active\n" "${local_priority}" "${local_type}" "${local_count}"
+            else
+                printf "  Priority %s: %-20s (%s instances)\n" "${local_priority}" "${local_type}" "${local_count}"
+            fi
+        done
+    fi
+else
+    # Standard single instance type path
+    if [ -n "${TOTAL_GPUS}" ]; then
+        echo "Instance: ${EP_INSTANCE_TYPE} (${EP_INSTANCE_COUNT} instance, ${TOTAL_GPUS} GPUs)"
+    else
+        echo "Instance: ${EP_INSTANCE_TYPE} (${EP_INSTANCE_COUNT} instance)"
+    fi
+fi
+echo ""
+# ============================================================
+# Describe Inference Components
+# ============================================================
+TOTAL_GPU_USED=0
+IC_ROWS=""
+if [ -d "${SCRIPT_DIR}/ic" ]; then
+    # Multi-IC path: iterate do/ic/*.conf files
+    # NOTE: Only base ICs in do/ic/ are counted toward GPU usage.
+    # Adapter ICs (do/adapters/*.conf) share the base IC's GPU resources
+    # and do not have their own ComputeResourceRequirements, so they are
+    # intentionally excluded from GPU capacity calculations.
+    HAS_ICS=false
+    for conf in "${SCRIPT_DIR}"/ic/*.conf; do
+        [ -f "${conf}" ] || continue
+        HAS_ICS=true
+        ic_basename=$(basename "${conf}" .conf)
+        # Read IC_DEPLOYED_NAME from the conf file
+        ic_deployed_name=""
+        if grep -q "^export IC_DEPLOYED_NAME=" "${conf}" 2>/dev/null; then
+            ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
+        fi
+        if [ -z "${ic_deployed_name}" ]; then
+            IC_ROWS="${IC_ROWS}$(printf "%-18s %-12s %-6s %-6s" "${ic_basename}" "Not Deployed" "-" "-")\n"
+            continue
+        fi
+        # Call DescribeInferenceComponent
+        IC_JSON=$(aws sagemaker describe-inference-component \
+            --inference-component-name "${ic_deployed_name}" \
+            --region "${AWS_REGION}" 2>/dev/null) || {
+            IC_ROWS="${IC_ROWS}$(printf "%-18s %-12s %-6s %-6s" "${ic_basename}" "Not Found" "-" "-")\n"
+            continue
+        }
+        ic_status=$(echo "${IC_JSON}" | grep -o '"InferenceComponentStatus":"[^"]*"' | head -1 | cut -d'"' -f4)
+        ic_status="${ic_status:-Unknown}"
+        ic_gpu_count=$(echo "${IC_JSON}" | grep -o '"NumberOfAcceleratorDevicesRequired":[0-9]*' | head -1 | cut -d':' -f2)
+        ic_gpu_count="${ic_gpu_count:-0}"
+        ic_copy_count=$(echo "${IC_JSON}" | grep -o '"DesiredCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
+        if [ -z "${ic_copy_count}" ]; then
+            ic_copy_count=$(echo "${IC_JSON}" | grep -o '"CurrentCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
+        fi
+        ic_copy_count="${ic_copy_count:-1}"
+        TOTAL_GPU_USED=$(( TOTAL_GPU_USED + ic_gpu_count ))
+        IC_ROWS="${IC_ROWS}$(printf "%-18s %-12s %-6s %-6s" "${ic_basename}" "${ic_status}" "${ic_gpu_count}" "${ic_copy_count}")\n"
+    done
+    if [ "${HAS_ICS}" = true ]; then
+        echo "Inference Components:"
+        printf "%-18s %-12s %-6s %-6s\n" "NAME" "STATUS" "GPUs" "COPIES"
+        echo -e "${IC_ROWS}" | head -n -1
+        echo "                                    ─────"
+        if [ -n "${TOTAL_GPUS}" ]; then
+            printf "Total GPU usage:                    %s/%s\n" "${TOTAL_GPU_USED}" "${TOTAL_GPUS}"
+        else
+            printf "Total GPU usage:                    %s\n" "${TOTAL_GPU_USED}"
+        fi
+    else
+        echo "No IC config files found in do/ic/"
+    fi
+else
+    # Legacy single-IC path: use INFERENCE_COMPONENT_NAME from config
+    ic_name="${INFERENCE_COMPONENT_NAME:-}"
+    if [ -z "${ic_name}" ]; then
+        echo "No inference component deployed."
+        echo "Run ./do/deploy to create one."
+    else
+        IC_JSON=$(aws sagemaker describe-inference-component \
+            --inference-component-name "${ic_name}" \
+            --region "${AWS_REGION}" 2>/dev/null) || {
+            echo "Inference Components:"
+            printf "%-18s %-12s %-6s %-6s\n" "NAME" "STATUS" "GPUs" "COPIES"
+            printf "%-18s %-12s %-6s %-6s\n" "default" "Not Found" "-" "-"
+            echo ""
+            exit 0
+        }
+        ic_status=$(echo "${IC_JSON}" | grep -o '"InferenceComponentStatus":"[^"]*"' | head -1 | cut -d'"' -f4)
+        ic_status="${ic_status:-Unknown}"
+        ic_gpu_count=$(echo "${IC_JSON}" | grep -o '"NumberOfAcceleratorDevicesRequired":[0-9]*' | head -1 | cut -d':' -f2)
+        ic_gpu_count="${ic_gpu_count:-0}"
+        ic_copy_count=$(echo "${IC_JSON}" | grep -o '"DesiredCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
+        if [ -z "${ic_copy_count}" ]; then
+            ic_copy_count=$(echo "${IC_JSON}" | grep -o '"CurrentCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
+        fi
+        ic_copy_count="${ic_copy_count:-1}"
+        TOTAL_GPU_USED=$(( TOTAL_GPU_USED + ic_gpu_count ))
+        echo "Inference Components:"
+        printf "%-18s %-12s %-6s %-6s\n" "NAME" "STATUS" "GPUs" "COPIES"
+        printf "%-18s %-12s %-6s %-6s\n" "default" "${ic_status}" "${ic_gpu_count}" "${ic_copy_count}"
+        echo "                                    ─────"
+        if [ -n "${TOTAL_GPUS}" ]; then
+            printf "Total GPU usage:                    %s/%s\n" "${TOTAL_GPU_USED}" "${TOTAL_GPUS}"
+        else
+            printf "Total GPU usage:                    %s\n" "${TOTAL_GPU_USED}"
+        fi
+    fi
+fi
+echo ""
+<% if (typeof enableLora !== 'undefined' && enableLora) { %>
+# ============================================================
+# Describe LoRA Adapters
+# ============================================================
+if [ "${ENABLE_LORA:-}" = "true" ]; then
+    # List all inference components on the endpoint
+    ADAPTER_IC_LIST=$(aws sagemaker list-inference-components \
+        --endpoint-name-equals "${ENDPOINT_NAME}" \
+        --region "${AWS_REGION}" 2>/dev/null) || ADAPTER_IC_LIST=""
+    if [ -n "${ADAPTER_IC_LIST}" ]; then
+        # Extract IC names
+        ADAPTER_IC_NAMES=$(echo "${ADAPTER_IC_LIST}" | jq -r '.InferenceComponents[].InferenceComponentName' 2>/dev/null)
+        # Filter to adapter ICs (those with BaseInferenceComponentName) and collect details
+        ADAPTER_ROWS=""
+        ADAPTER_COUNT=0
+        for ic_name in ${ADAPTER_IC_NAMES}; do
+            # Describe each IC to check if it's an adapter
+            ic_detail=$(aws sagemaker describe-inference-component \
+                --inference-component-name "${ic_name}" \
+                --region "${AWS_REGION}" 2>/dev/null) || continue
+            # Check if this IC has a BaseInferenceComponentName (adapter IC)
+            base_ic=$(echo "${ic_detail}" | jq -r '.Specification.BaseInferenceComponentName // empty' 2>/dev/null)
+            if [ -z "${base_ic}" ]; then
+                # Not an adapter IC — skip
+                continue
+            fi
+            # Extract status and artifact URL
+            adapter_status=$(echo "${ic_detail}" | jq -r '.InferenceComponentStatus // "Unknown"' 2>/dev/null)
+            adapter_weights=$(echo "${ic_detail}" | jq -r '.Specification.Container.ArtifactUrl // "N/A"' 2>/dev/null)
+            # Derive display name (strip project prefix if present)
+            display_name="${ic_name}"
+            if [[ "${ic_name}" == "${PROJECT_NAME}-adapter-"* ]]; then
+                display_name="${ic_name#${PROJECT_NAME}-adapter-}"
+            fi
+            ADAPTER_ROWS="${ADAPTER_ROWS}$(printf '%-14s%-12s%s' "${display_name}" "${adapter_status}" "${adapter_weights}")\n"
+            ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
+        done
+        echo "Adapters (LoRA):  [max: <%= maxLoras %> GPU / 70 CPU]"
+        if [ "${ADAPTER_COUNT}" -eq 0 ]; then
+            echo "  No adapters deployed"
+        else
+            printf '%-14s%-12s%s\n' "NAME" "STATUS" "WEIGHTS"
+            echo -e "${ADAPTER_ROWS}" | head -n -1
+        fi
+    else
+        echo "Adapters (LoRA):  [max: <%= maxLoras %> GPU / 70 CPU]"
+        echo "  No adapters deployed"
+    fi
+    echo ""
+fi
+<% } %>

package/templates/do/test CHANGED Viewed

@@ -15,10 +15,11 @@ source "${SCRIPT_DIR}/config"
 # SageMaker Real-Time Inference Testing
 # ============================================================
-# Parse arguments
-ENDPOINT_NAME="${1:-${ENDPOINT_NAME:-}}"
+# Parse arguments: ./do/test [<ic-name>]
+IC_ARG="${1:-}"
-if [ -z "${ENDPOINT_NAME}" ]; then
+# Determine test mode based on ENDPOINT_NAME in config
+if [ -z "${ENDPOINT_NAME:-}" ]; then
     echo "🧪 Testing local container at localhost:8080"
     echo "   Project: ${PROJECT_NAME}"
     echo "   Framework: ${FRAMEWORK}"
@@ -210,8 +211,53 @@ else
     # Create temporary file for response
     TEMP_RESPONSE=$(mktemp)
-    # Invoke endpoint via inference component
-    IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
+    # Resolve inference component name
+    # Precedence: do/adapters/ → do/ic/ → legacy config
+    IC_NAME=""
+    if [ -n "${IC_ARG}" ] && [ -f "${SCRIPT_DIR}/adapters/${IC_ARG}.conf" ]; then
+        # Argument matches an adapter name — use adapter IC
+        ADAPTER_IC_NAME=""
+        source "${SCRIPT_DIR}/adapters/${IC_ARG}.conf"
+        if [ -z "${ADAPTER_IC_NAME}" ]; then
+            echo "❌ Adapter '${IC_ARG}' conf is missing ADAPTER_IC_NAME."
+            exit 1
+        fi
+        IC_NAME="${ADAPTER_IC_NAME}"
+    elif [ -n "${IC_ARG}" ]; then
+        # Explicit IC name provided as argument
+        IC_CONF="${SCRIPT_DIR}/ic/${IC_ARG}.conf"
+        if [ ! -f "${IC_CONF}" ]; then
+            echo "❌ IC config not found: do/ic/${IC_ARG}.conf"
+            exit 1
+        fi
+        IC_DEPLOYED_NAME=""
+        source "${IC_CONF}"
+        if [ -z "${IC_DEPLOYED_NAME}" ]; then
+            echo "❌ IC '${IC_ARG}' has not been deployed yet. Run ./do/deploy --ic ${IC_ARG} first."
+            exit 1
+        fi
+        IC_NAME="${IC_DEPLOYED_NAME}"
+    elif [ -d "${SCRIPT_DIR}/ic" ]; then
+        # No argument, but do/ic/ exists — use first IC alphabetically
+        IC_NAME=""
+        for conf in "${SCRIPT_DIR}"/ic/*.conf; do
+            [ -f "${conf}" ] || continue
+            IC_DEPLOYED_NAME=""
+            source "${conf}"
+            if [ -n "${IC_DEPLOYED_NAME}" ]; then
+                IC_NAME="${IC_DEPLOYED_NAME}"
+                break
+            fi
+        done
+        if [ -z "${IC_NAME}" ]; then
+            echo "❌ No ICs deployed. Run ./do/deploy first."
+            exit 1
+        fi
+    else
+        # Legacy: no do/ic/ directory, use INFERENCE_COMPONENT_NAME from do/config
+        IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
+    fi
     INVOKE_ARGS=(
         --endpoint-name "${ENDPOINT_NAME}"
         --region "${AWS_REGION}"
@@ -285,12 +331,15 @@ if [ "${TEST_MODE}" = "local" ]; then
     echo "  • Push to ECR: ./do/push"
     echo "  • Deploy to SageMaker: ./do/deploy"
 else
-    echo "Endpoint is ready for production use!"
-    echo "  • Endpoint name: ${ENDPOINT_NAME}"
-    echo "  • Region: ${AWS_REGION}"
-    echo ""
-    echo "📝 Register this deployment:"
-    echo "  ./do/register"
+    echo "📋 What's next?"
+<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
+    echo "   • Benchmark performance:      ./do/benchmark"
+<% } %>
+<% if (typeof enableLora !== 'undefined' && enableLora) { %>
+    echo "   • Add a LoRA adapter:         ./do/adapter add <name> --weights s3://..."
+<% } %>
+    echo "   • Register this deployment:   ./do/register"
+    echo "   • View logs:                  ./do/logs"
 fi
 <% } else if (deploymentTarget === 'async-inference') { %>
@@ -599,13 +648,13 @@ if [ "${TEST_MODE}" = "local" ]; then
     echo "  • Push to ECR: ./do/push"
     echo "  • Deploy to SageMaker: ./do/deploy"
 else
-    echo "Async endpoint is ready for production use!"
-    echo "  • Endpoint name: ${ENDPOINT_NAME}"
-    echo "  • Region: ${AWS_REGION}"
-    echo "  • S3 output: ${ASYNC_S3_OUTPUT_PATH}"
-    echo ""
-    echo "📝 Register this deployment:"
-    echo "  ./do/register"
+    echo "📋 What's next?"
+<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
+    echo "   • Benchmark performance:      ./do/benchmark"
+<% } %>
+    echo "   • Check async output:         aws s3 ls ${ASYNC_S3_OUTPUT_PATH}"
+    echo "   • Register this deployment:   ./do/register"
+    echo "   • View logs:                  ./do/logs"
 fi
 <% } else if (deploymentTarget === 'hyperpod-eks') { %>
@@ -864,13 +913,14 @@ if [ "${TEST_TARGET}" = "local" ]; then
     echo "  • Push to ECR: ./do/push"
     echo "  • Deploy to HyperPod: ./do/deploy"
 else
-    echo "HyperPod deployment is ready for production use!"
-    echo "  • Cluster: ${HYPERPOD_CLUSTER_NAME}"
-    echo "  • Namespace: ${HYPERPOD_NAMESPACE}"
-    echo "  • Service: ${PROJECT_NAME}"
-    echo ""
-    echo "📝 Register this deployment:"
-    echo "  ./do/register"
+    echo "📋 What's next?"
+<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
+    echo "   • Benchmark performance:      ./do/benchmark"
+<% } %>
+    echo "   • Check pod status:           kubectl get pods -n ${HYPERPOD_NAMESPACE}"
+    echo "   • View pod logs:              kubectl logs -n ${HYPERPOD_NAMESPACE} -l app=${PROJECT_NAME}"
+    echo "   • Register this deployment:   ./do/register"
+    echo "   • View logs:                  ./do/logs"
 fi
 <% } else if (deploymentTarget === 'batch-transform') { %>
@@ -950,8 +1000,10 @@ case "${TEST_TARGET}" in
                 echo ""
                 echo "✅ All tests passed!"
                 echo ""
-                echo "📝 Register this deployment:"
-                echo "  ./do/register"
+                echo "📋 What's next?"
+                echo "   • View results:               cat batch-output/"
+                echo "   • Register this deployment:   ./do/register"
+                echo "   • View logs:                  ./do/logs"
                 ;;
             InProgress)
                 echo "⏳ Transform job is still in progress"
@@ -1144,4 +1196,4 @@ echo "Next steps:"
 echo "  • Push to ECR: ./do/push"
 echo "  • Deploy batch transform: ./do/deploy"
-<% } %>
+<% } %>