npm - @aws/ml-container-creator - Versions diffs - 0.13.3 → 0.13.5 - Mend

@aws/ml-container-creator 0.13.3 → 0.13.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md +23 -5
package/infra/ci-harness/package-lock.json +1 -5
package/package.json +5 -3
package/pyproject.toml +21 -0
package/requirements.txt +19 -0
package/servers/instance-sizer/lib/model-resolver.js +127 -185
package/servers/instance-sizer/lib/vram-estimator.js +86 -0
package/servers/lib/catalogs/instances.json +0 -27
package/src/app.js +2 -0
package/src/lib/bootstrap-command-handler.js +35 -25
package/src/lib/generated/cli-options.js +1 -1
package/src/lib/generated/parameter-matrix.js +1 -1
package/src/lib/generated/validation-rules.js +1 -1
package/src/lib/prompt-runner.js +14 -31
package/templates/IAM_PERMISSIONS.md +64 -13
package/templates/do/.adapter_helper.py +451 -0
package/templates/do/.benchmark_writer.py +13 -0
package/templates/do/.stage_helper.py +419 -0
package/templates/do/.tune_helper.py +218 -67
package/templates/do/README.md +50 -604
package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
package/templates/do/adapter +109 -4
package/templates/do/benchmark +150 -12
package/templates/do/build +2 -5
package/templates/do/clean.d/async-inference.ejs +2 -5
package/templates/do/clean.d/batch-transform.ejs +2 -5
package/templates/do/clean.d/hyperpod-eks.ejs +2 -5
package/templates/do/clean.d/managed-inference.ejs +2 -5
package/templates/do/config +4 -0
package/templates/do/deploy.d/async-inference.ejs +6 -9
package/templates/do/deploy.d/batch-transform.ejs +4 -7
package/templates/do/deploy.d/hyperpod-eks.ejs +1 -4
package/templates/do/deploy.d/managed-inference.ejs +15 -6
package/templates/do/lib/profile.sh +24 -15
package/templates/do/push +2 -5
package/templates/do/register +2 -5
package/templates/do/stage +114 -292
package/templates/do/submit +1 -4
package/templates/do/tune +64 -10
package/templates/MIGRATION.md +0 -488
package/templates/TEMPLATE_SYSTEM.md +0 -243

package/templates/do/adapter CHANGED Viewed

@@ -21,10 +21,7 @@ source "${SCRIPT_DIR}/config"
 source "${SCRIPT_DIR}/lib/profile.sh"
 # ── Profile-resolved variables (env var > profile > default) ──────────────────
-# Disable unbound-variable checking for associative array access (bash 3.2 compat)
-set +u
-ADAPTER_S3_BUCKET="${ADAPTER_S3_BUCKET:-mlcc-adapters-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
-set -u
+ADAPTER_S3_BUCKET="${ADAPTER_S3_BUCKET:-mlcc-adapters-${_PROFILE_accountId:-unknown}-${_PROFILE_awsRegion:-us-east-1}}"
 source "${SCRIPT_DIR}/lib/wait.sh"
@@ -46,12 +43,16 @@ _usage() {
     echo ""
     echo "Options:"
     echo "  --help, -h    Show this help message"
+    echo "  --local       Use local aws s3 cp instead of Processing Job (--from-tune)"
+    echo "  --no-wait     Submit Processing Job and return immediately (--from-tune)"
     echo ""
     echo "Examples:"
     echo "  ./do/adapter add ectsum --weights s3://my-bucket/adapters/ectsum/adapter.tar.gz"
     echo "  ./do/adapter add ectsum --from-hub predibase/llama-3.1-8b-ectsum"
     echo "  ./do/adapter add tuned-sft --from-tune"
     echo "  ./do/adapter add tuned-sft --from-tune sft"
+    echo "  ./do/adapter add tuned-sft --from-tune --local"
+    echo "  ./do/adapter add tuned-sft --from-tune --no-wait"
     echo "  ./do/adapter list"
     echo "  ./do/adapter remove ectsum"
     echo "  ./do/adapter update ectsum --weights s3://my-bucket/adapters/ectsum-v2/adapter.tar.gz"
@@ -370,6 +371,8 @@ _adapter_add() {
     local from_hub=""
     local from_tune=""
     local from_tune_technique=""
+    local use_local=""
+    local no_wait=""
     # Parse add arguments
     shift  # remove 'add' from args
@@ -403,6 +406,14 @@ _adapter_add() {
                     shift
                 fi
                 ;;
+            --local)
+                use_local="true"
+                shift
+                ;;
+            --no-wait)
+                no_wait="true"
+                shift
+                ;;
             --help|-h)
                 echo "Usage: ./do/adapter add <name> --weights <s3-uri>"
                 echo "       ./do/adapter add <name> --from-hub <hf-repo-id>"
@@ -417,6 +428,8 @@ _adapter_add() {
                 echo "  --from-tune [technique]     Use adapter output from do/tune"
                 echo "                              Without technique: uses latest tune output"
                 echo "                              With technique (e.g., sft, dpo): uses technique-specific output"
+                echo "  --local                     Use local aws s3 cp instead of Processing Job (--from-tune only)"
+                echo "  --no-wait                   Submit Processing Job and return immediately (--from-tune only)"
                 echo ""
                 echo "Note: --weights, --from-hub, and --from-tune are mutually exclusive."
                 echo ""
@@ -425,6 +438,8 @@ _adapter_add() {
                 echo "  ./do/adapter add ectsum --from-hub predibase/llama-3.1-8b-ectsum"
                 echo "  ./do/adapter add tuned-sft --from-tune"
                 echo "  ./do/adapter add tuned-sft --from-tune sft"
+                echo "  ./do/adapter add tuned-sft --from-tune --local"
+                echo "  ./do/adapter add tuned-sft --from-tune --no-wait"
                 exit 0
                 ;;
             -*)
@@ -532,6 +547,95 @@ _adapter_add() {
         fi
         echo ""
+        # ── Route to Processing Job helper (default) or local path ────────
+        if [ -z "${use_local}" ]; then
+            # Default: use Processing Job via .adapter_helper.py
+            echo "🚀 Submitting Processing Job to stage adapter..."
+            echo ""
+            # Resolve execution role
+            local exec_role="${EXECUTION_ROLE_ARN:-}"
+            if [ -z "${exec_role}" ]; then
+                exec_role="${ROLE_ARN:-}"
+            fi
+            if [ -z "${exec_role}" ]; then
+                exec_role="${SAGEMAKER_ROLE_ARN:-}"
+            fi
+            if [ -z "${exec_role}" ]; then
+                echo "❌ No execution role found."
+                echo ""
+                echo "   Run 'ml-container-creator bootstrap' to set up your profile,"
+                echo "   or set ROLE_ARN / EXECUTION_ROLE_ARN in do/config."
+                exit 1
+            fi
+            # Resolve S3 bucket
+            local adapter_bucket="${ADAPTER_S3_BUCKET:-}"
+            if [ -z "${adapter_bucket}" ]; then
+                local account_id
+                account_id=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "")
+                if [ -z "${account_id}" ]; then
+                    echo "❌ Could not determine AWS account ID."
+                    echo "   Ensure AWS credentials are configured."
+                    exit 4
+                fi
+                adapter_bucket="mlcc-adapters-${account_id}-${AWS_REGION}"
+            fi
+            # Build helper args
+            local helper_args=(
+                "stage-from-tune"
+                "--training-output-s3-uri" "${weights_uri}"
+                "--adapter-name" "${adapter_name}"
+                "--bucket" "${adapter_bucket}"
+                "--project" "${PROJECT_NAME}"
+                "--role-arn" "${exec_role}"
+                "--region" "${AWS_REGION}"
+            )
+            if [ -n "${no_wait}" ]; then
+                helper_args+=("--no-wait")
+            fi
+            # Invoke the Python helper
+            local helper_output
+            if ! helper_output=$(python3 "${SCRIPT_DIR}/.adapter_helper.py" "${helper_args[@]}" 2>/dev/null); then
+                echo "❌ Processing Job failed. See error above."
+                exit 1
+            fi
+            # Parse JSON output from helper (extract only the JSON line, skip any log noise)
+            local json_line
+            json_line=$(echo "${helper_output}" | grep -E '^\{' | tail -1)
+            local job_status
+            job_status=$(echo "${json_line}" | python3 -c "import sys,json; print(json.loads(sys.stdin.read()).get('status',''))" 2>/dev/null || echo "")
+            if [ "${job_status}" = "Completed" ] || [ "${job_status}" = "InProgress" ]; then
+                echo "${json_line}"
+                # Extract adapter_s3_uri for downstream use
+                local staged_adapter_uri
+                staged_adapter_uri=$(echo "${json_line}" | python3 -c "import sys,json; print(json.loads(sys.stdin.read()).get('adapter_s3_uri',''))" 2>/dev/null || echo "")
+                if [ -n "${no_wait}" ]; then
+                    echo ""
+                    echo "✅ Processing Job submitted. Check status with:"
+                    echo "   python3 ${SCRIPT_DIR}/.adapter_helper.py status --job-name <job-name>"
+                    echo ""
+                    echo "   Once complete, re-run without --no-wait to register the adapter."
+                    exit 0
+                fi
+                # Update weights_uri to point to the staged adapter
+                weights_uri="${staged_adapter_uri}"
+                echo ""
+                echo "✅ Adapter staged via Processing Job: ${weights_uri}"
+            else
+                echo "❌ Unexpected status from Processing Job helper: ${job_status}"
+                echo "   Output: ${helper_output}"
+                exit 1
+            fi
+        else
+            # ── --local flag: Package tune artifacts locally (original behavior) ──
         # ── Package tune artifacts as tar.gz if needed ────────────────────
         # Tune output is an S3 path that may be:
         #   1. Already a tar.gz file (s3://...adapter.tar.gz) → use directly
@@ -680,6 +784,7 @@ _adapter_add() {
             weights_uri="${s3_tar_path}"
         fi
         echo ""
+        fi  # end --local else branch
     fi
     # ── Validate HF repo ID format (if --from-hub) ───────────────────────

package/templates/do/benchmark CHANGED Viewed

@@ -12,10 +12,12 @@ set -o pipefail
 # ── Source project configuration ──────────────────────────────────────────────
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
 # ── Parse flags ───────────────────────────────────────────────────────────────
 CLEAN_AFTER=false
 FORCE=false
+ARG_STATUS=false
 IC_ARG=""
 ADAPTER_ARG=""
 ARG_NO_STALE_WARNING=false
@@ -24,30 +26,33 @@ while [ $# -gt 0 ]; do
     case "$1" in
         --clean) CLEAN_AFTER=true; shift ;;
         --force) FORCE=true; shift ;;
+        --status) ARG_STATUS=true; shift ;;
         --no-stale-warning) ARG_NO_STALE_WARNING=true; shift ;;
         --workload) shift; ARG_WORKLOAD="${1:-}"; shift ;;
         --ic) shift; IC_ARG="${1:-}"; shift ;;
         --adapter) shift; ADAPTER_ARG="${1:-}"; shift ;;
         --help|-h)
-            echo "Usage: ./do/benchmark [--workload <name>] [--ic <name>] [--adapter <name>] [--force] [--clean] [--no-stale-warning]"
+            echo "Usage: ./do/benchmark [--workload <name>] [--status] [--ic <name>] [--adapter <name>] [--force] [--clean]"
             echo ""
             echo "Run SageMaker AI Benchmark against the deployed endpoint."
             echo ""
             echo "Options:"
+            echo "  --status            Check job status; if completed, download results + write to Athena"
             echo "  --ic <name>         Benchmark a specific inference component"
             echo "  --adapter <name>    Benchmark a specific LoRA adapter IC"
             echo "  --force             Create a new benchmark job even if one is already running"
             echo "  --clean             Delete workload config and benchmark job after displaying results"
             echo "  --no-stale-warning  Suppress schema registry staleness warning"
+            echo "  --no-stale-warning  Suppress schema registry staleness warning"
             echo ""
             echo "IC resolution:"
             echo "  --adapter <name> Use ADAPTER_IC_NAME from do/adapters/<name>.conf"
             echo "  --ic <name>      Use IC_DEPLOYED_NAME from do/ic/<name>.conf"
             echo "  (no flag)        Use first IC in do/ic/ alphabetically, or legacy config"
             echo ""
-            echo "Idempotency:"
-            echo "  If a benchmark job is already in progress, re-running without --force"
-            echo "  will resume waiting for the existing job and display its results."
+            echo "Status:"
+            echo "  After interrupting a running benchmark, use --status to check completion"
+            echo "  and trigger results download + Athena write."
             echo ""
             echo "Prerequisites:"
             echo "  • Endpoint must be deployed and InService (run ./do/deploy first)"
@@ -59,6 +64,112 @@ while [ $# -gt 0 ]; do
 done
+# ── Handle --status (early exit) ─────────────────────────────────────────────
+# Query the tracked benchmark job, display status, and if completed:
+# download results, display metrics, and write to Athena (if not already done).
+if [ "${ARG_STATUS}" = true ]; then
+    JOB_NAME="${BENCHMARK_JOB_NAME:-}"
+    if [ -z "${JOB_NAME}" ]; then
+        echo "❌ No benchmark job tracked"
+        echo "   Run ./do/benchmark --workload <name> to start one."
+        exit 1
+    fi
+    echo "📊 Benchmark Job Status"
+    echo ""
+    echo "   Job: ${JOB_NAME}"
+    STATUS=$(aws sagemaker describe-ai-benchmark-job \
+        --ai-benchmark-job-name "${JOB_NAME}" \
+        --region "${AWS_REGION}" \
+        --query 'AIBenchmarkJobStatus' \
+        --output text 2>/dev/null) || STATUS=""
+    if [ -z "${STATUS}" ]; then
+        echo "   Status: Unknown (job not found or credentials expired)"
+        exit 1
+    fi
+    echo "   Status: ${STATUS}"
+    case "${STATUS}" in
+        Completed)
+            # Check if results already exist locally
+            PROJECT_ROOT="${SCRIPT_DIR}/.."
+            LOCAL_RESULTS_DIR="${PROJECT_ROOT}/benchmarks/${JOB_NAME}"
+            RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1)
+            if [ -z "${RESULTS_JSONL}" ]; then
+                echo ""
+                echo "   📥 Downloading results..."
+                RESULTS_S3_PATH=$(aws sagemaker describe-ai-benchmark-job \
+                    --ai-benchmark-job-name "${JOB_NAME}" \
+                    --region "${AWS_REGION}" \
+                    --query 'OutputConfig.S3OutputLocation' \
+                    --output text 2>/dev/null)
+                if [ -n "${RESULTS_S3_PATH}" ]; then
+                    mkdir -p "${LOCAL_RESULTS_DIR}/output"
+                    aws s3 sync "${RESULTS_S3_PATH}" "${LOCAL_RESULTS_DIR}/output/" \
+                        --region "${AWS_REGION}" --quiet
+                    # Untar if output.tar.gz exists
+                    local tar_file
+                    tar_file=$(find "${LOCAL_RESULTS_DIR}" -name "output.tar.gz" -type f 2>/dev/null | head -1)
+                    if [ -n "${tar_file}" ]; then
+                        tar -xzf "${tar_file}" --strip-components=1 -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
+                    fi
+                    # Re-search after extraction
+                    RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1)
+                    echo "   ✅ Results downloaded to: benchmarks/${JOB_NAME}/"
+                fi
+            else
+                echo "   ✅ Results already available locally"
+            fi
+            # Write to Athena if CI bucket is configured and results exist
+            if [ -n "${CI_BENCHMARK_RESULTS_BUCKET:-}" ]; then
+                _WRITER_INPUT=""
+                if [ -n "${RESULTS_JSONL}" ] && [ -f "${RESULTS_JSONL}" ]; then
+                    _WRITER_INPUT="${RESULTS_JSONL}"
+                else
+                    _WRITER_INPUT=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export_aiperf.json" -type f 2>/dev/null | head -1)
+                fi
+                if [ -n "${_WRITER_INPUT}" ]; then
+                    echo ""
+                    echo "   📊 Writing to Athena..."
+                    if python3 "$(dirname "${BASH_SOURCE[0]}")/.benchmark_writer.py" write \
+                        --results-file "${_WRITER_INPUT}" \
+                        --config-file "$(dirname "${BASH_SOURCE[0]}")/config" \
+                        --project-name "${PROJECT_NAME}" \
+                        --workload "${BENCHMARK_WORKLOAD:-manual}" \
+                        --concurrency "${BENCHMARK_CONCURRENCY:-2}" \
+                        --bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
+                        --region "${AWS_REGION:-${REGION}}"; then
+                        echo "   ✅ Results persisted to Athena"
+                    else
+                        echo "   ⚠️  Athena write failed (non-fatal)"
+                    fi
+                fi
+            fi
+            ;;
+        InProgress|Starting|Pending)
+            echo ""
+            echo "   Job is still running. Check again with: ./do/benchmark --status"
+            ;;
+        Failed)
+            FAILURE_REASON=$(aws sagemaker describe-ai-benchmark-job \
+                --ai-benchmark-job-name "${JOB_NAME}" \
+                --region "${AWS_REGION}" \
+                --query 'FailureReason' \
+                --output text 2>/dev/null) || FAILURE_REASON="unknown"
+            echo "   Reason: ${FAILURE_REASON}"
+            ;;
+    esac
+    exit 0
+fi
 # ── Require --workload flag ───────────────────────────────────────────────────
 if [ -z "${ARG_WORKLOAD}" ]; then
     echo "❌ --workload <name> is required"
@@ -172,8 +283,11 @@ print(f's3://{bucket}/${PROJECT_NAME}/')
 CI_BENCHMARK_RESULTS_BUCKET=$(echo "${_PROFILE_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('ciBenchmarkResultsBucket', ''))" 2>/dev/null) || CI_BENCHMARK_RESULTS_BUCKET=""
-# Derive job names at runtime (unique per invocation)
-BENCHMARK_JOB_NAME="${PROJECT_NAME}-benchmark-$(date +%Y%m%d-%H%M%S)"
+ROLE_ARN=$(echo "${_PROFILE_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('roleArn', ''))" 2>/dev/null) || ROLE_ARN=""
+# Derive job names at runtime (unique per invocation).
+# Preserve BENCHMARK_JOB_NAME if already set (from do/config or env) for resume logic.
+BENCHMARK_JOB_NAME="${BENCHMARK_JOB_NAME:-${PROJECT_NAME}-benchmark-$(date +%Y%m%d-%H%M%S)}"
 BENCHMARK_WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config-$(date +%Y%m%d-%H%M%S)"
 # Ensure benchmark params have defaults (in case workload catalog wasn't found)
@@ -228,7 +342,15 @@ if [ -n "${BENCHMARK_CONCURRENCY_LEVELS:-}" ] && [ -z "${_BENCHMARK_SINGLE_LEVEL
             if [ -n "${IC_ARG}" ]; then _REINVOKE_ARGS="${_REINVOKE_ARGS} --ic ${IC_ARG}"; fi
             if [ -n "${ADAPTER_ARG}" ]; then _REINVOKE_ARGS="${_REINVOKE_ARGS} --adapter ${ADAPTER_ARG}"; fi
-            if "${BASH_SOURCE[0]}" ${_REINVOKE_ARGS}; then
+            _CHILD_EXIT=0
+            "${BASH_SOURCE[0]}" ${_REINVOKE_ARGS} || _CHILD_EXIT=$?
+            if [ ${_CHILD_EXIT} -eq 130 ]; then
+                # Child was interrupted (Ctrl+C) — propagate cleanly
+                exit 130
+            fi
+            if [ ${_CHILD_EXIT} -eq 0 ]; then
                 # Copy results to aggregation directory — find the child's results
                 # Try the marker file first (set by child), then fall back to ls -td
                 _LATEST_JOB_DIR=""
@@ -747,7 +869,10 @@ echo ""
 echo "⚙️  Step 1: Creating AI Workload Config: ${WORKLOAD_CONFIG_NAME}"
 # Build parameters block
-PARAMS_JSON="{\"prompt_input_tokens_mean\":${BENCHMARK_INPUT_TOKENS_MEAN},\"output_tokens_mean\":${BENCHMARK_OUTPUT_TOKENS_MEAN},\"concurrency\":${BENCHMARK_CONCURRENCY},\"streaming\":${BENCHMARK_STREAMING},\"tokenizer\":\"${MODEL_NAME}\""
+# Use HF_MODEL_ID for tokenizer (the original HuggingFace repo ID, e.g. "Qwen/Qwen3-0.6B").
+# MODEL_NAME may have been rewritten to an S3 URI by do/stage, which AIPerf can't use as a tokenizer source.
+BENCHMARK_TOKENIZER="${HF_MODEL_ID:-${MODEL_NAME}}"
+PARAMS_JSON="{\"prompt_input_tokens_mean\":${BENCHMARK_INPUT_TOKENS_MEAN},\"output_tokens_mean\":${BENCHMARK_OUTPUT_TOKENS_MEAN},\"concurrency\":${BENCHMARK_CONCURRENCY},\"streaming\":${BENCHMARK_STREAMING},\"tokenizer\":\"${BENCHMARK_TOKENIZER}\""
 # Add optional request_count if specified
 if [ -n "${BENCHMARK_REQUEST_COUNT:-}" ]; then
@@ -856,6 +981,18 @@ fi  # end of RESUME_EXISTING=false block
 # Skip polling if we already know the job completed (resumed a finished job)
 if [ "${JOB_STATUS:-}" != "Completed" ] && [ "${JOB_STATUS:-}" != "Failed" ] && [ "${JOB_STATUS:-}" != "Stopped" ]; then
+# Handle Ctrl+C during polling — exit cleanly without stopping the remote job.
+_handle_benchmark_interrupt() {
+    echo ""
+    echo ""
+    echo "⚠️  Interrupted — job continues running in background"
+    echo "   Job: ${BENCHMARK_JOB_NAME}"
+    echo ""
+    echo "   Check status:      aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
+    exit 130
+}
+trap '_handle_benchmark_interrupt' INT
 echo "⏳ Step 3: Waiting for benchmark to complete..."
 echo "   Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 30 min)"
 echo ""
@@ -897,13 +1034,14 @@ while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
     esac
 done
+trap - INT
 # Check for timeout
 if [ ${POLL_COUNT} -ge ${MAX_POLL_ATTEMPTS} ]; then
     echo ""
     echo "⚠️  Benchmark timed out after 30 minutes (status: ${JOB_STATUS})"
-    echo "   The job may still be running. Re-run ./do/benchmark to resume waiting."
-    echo "   Or check status manually:"
-    echo "   aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
+    echo "   The job may still be running."
+    echo "   Check status: ./do/benchmark --status"
     exit 1
 fi
@@ -949,7 +1087,7 @@ if [ "${JOB_STATUS}" = "Completed" ]; then
             # Extract any tar.gz archives (benchmark service packages results as output.tar.gz)
             for ARCHIVE in $(find "${LOCAL_RESULTS_DIR}" -name "*.tar.gz" -type f 2>/dev/null); do
                 ARCHIVE_DIR=$(dirname "${ARCHIVE}")
-                tar -xzf "${ARCHIVE}" -C "${ARCHIVE_DIR}" 2>/dev/null || true
+                tar -xzf "${ARCHIVE}" --strip-components=1 -C "${ARCHIVE_DIR}" 2>/dev/null || true
             done
             # Look for specific result files (priority: JSONL > aiperf JSON)

package/templates/do/build CHANGED Viewed

@@ -12,11 +12,8 @@ source "${SCRIPT_DIR}/config"
 source "${SCRIPT_DIR}/lib/profile.sh"
 # ── Profile-resolved variables (env var > profile > default) ──────────────────
-# Disable unbound-variable checking for associative array access (bash 3.2 compat)
-set +u
-ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
-export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
-set -u
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
 echo "🚀 Building Docker image for ${PROJECT_NAME}"
 echo "   Deployment config: ${DEPLOYMENT_CONFIG}"

package/templates/do/clean.d/async-inference.ejs CHANGED Viewed

@@ -12,11 +12,8 @@ source "${SCRIPT_DIR}/config"
 source "${SCRIPT_DIR}/lib/profile.sh"
 # ── Profile-resolved variables (env var > profile > default) ──────────────────
-# Disable unbound-variable checking for associative array access (bash 3.2 compat)
-set +u
-ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
-export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
-set -u
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/clean.d/batch-transform.ejs CHANGED Viewed

@@ -12,11 +12,8 @@ source "${SCRIPT_DIR}/config"
 source "${SCRIPT_DIR}/lib/profile.sh"
 # ── Profile-resolved variables (env var > profile > default) ──────────────────
-# Disable unbound-variable checking for associative array access (bash 3.2 compat)
-set +u
-ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
-export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
-set -u
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/clean.d/hyperpod-eks.ejs CHANGED Viewed

@@ -12,11 +12,8 @@ source "${SCRIPT_DIR}/config"
 source "${SCRIPT_DIR}/lib/profile.sh"
 # ── Profile-resolved variables (env var > profile > default) ──────────────────
-# Disable unbound-variable checking for associative array access (bash 3.2 compat)
-set +u
-ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
-export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
-set -u
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/clean.d/managed-inference.ejs CHANGED Viewed

@@ -12,11 +12,8 @@ source "${SCRIPT_DIR}/config"
 source "${SCRIPT_DIR}/lib/profile.sh"
 # ── Profile-resolved variables (env var > profile > default) ──────────────────
-# Disable unbound-variable checking for associative array access (bash 3.2 compat)
-set +u
-ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
-export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
-set -u
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/config CHANGED Viewed

@@ -214,6 +214,9 @@ export <%= key %>=${<%= key %>:-<%= value %>}
 # Framework-specific configuration
 <% if (framework === 'transformers') { %>
 export MODEL_NAME="<%= modelName %>"
+# HuggingFace Model ID — preserved even after do/stage rewrites MODEL_NAME to S3.
+# Used by do/benchmark (tokenizer), do/tune (model catalog), and do/test (chat template).
+export HF_MODEL_ID="<%= modelName %>"
 # Secrets Manager integration: when an ARN is configured, do-scripts resolve the
 # secret at the appropriate stage (build-time or runtime). When a plaintext value
 # is configured, it is exported directly. The _ARN suffix signals resolution is needed.
@@ -253,6 +256,7 @@ export TUNE_MODEL_ID="<%= tuneModelId %>"
 <% if (framework === 'diffusors') { %>
 export MODEL_NAME="<%= modelName %>"
+export HF_MODEL_ID="<%= modelName %>"
 # Secrets Manager integration: when an ARN is configured, do-scripts resolve the
 # secret at the appropriate stage (build-time or runtime). When a plaintext value
 # is configured, it is exported directly. The _ARN suffix signals resolution is needed.

package/templates/do/deploy.d/async-inference.ejs CHANGED Viewed

@@ -41,18 +41,15 @@ source "${SCRIPT_DIR}/config"
 source "${SCRIPT_DIR}/lib/profile.sh"
 # ── Profile-resolved variables (env var > profile > default) ──────────────────
-# Disable unbound-variable checking for associative array access (bash 3.2 compat)
-set +u
-ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
-ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
-export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
+ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
 # Async-specific derived variables
-_ASYNC_BUCKET="${_PROFILE[asyncS3Bucket]:-mlcc-async-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
+_ASYNC_BUCKET="${_PROFILE_asyncS3Bucket:-mlcc-async-${_PROFILE_accountId:-unknown}-${_PROFILE_awsRegion:-us-east-1}}"
 ASYNC_S3_OUTPUT_PATH="${ASYNC_S3_OUTPUT_PATH:-s3://${_ASYNC_BUCKET}/${PROJECT_NAME}/output/}"
-ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
-ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
-set -u
+ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE_awsRegion:-us-east-1}:${_PROFILE_accountId:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
+ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE_awsRegion:-us-east-1}:${_PROFILE_accountId:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"

package/templates/do/deploy.d/batch-transform.ejs CHANGED Viewed

@@ -41,17 +41,14 @@ source "${SCRIPT_DIR}/config"
 source "${SCRIPT_DIR}/lib/profile.sh"
 # ── Profile-resolved variables (env var > profile > default) ──────────────────
-# Disable unbound-variable checking for associative array access (bash 3.2 compat)
-set +u
-ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
-ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
-export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
+ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
 # Batch-specific derived variables
-_BATCH_BUCKET="${_PROFILE[batchS3Bucket]:-mlcc-batch-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
+_BATCH_BUCKET="${_PROFILE_batchS3Bucket:-mlcc-batch-${_PROFILE_accountId:-unknown}-${_PROFILE_awsRegion:-us-east-1}}"
 BATCH_INPUT_PATH="${BATCH_INPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/input/}"
 BATCH_OUTPUT_PATH="${BATCH_OUTPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/output/}"
-set -u
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"

package/templates/do/deploy.d/hyperpod-eks.ejs CHANGED Viewed

@@ -41,10 +41,7 @@ source "${SCRIPT_DIR}/config"
 source "${SCRIPT_DIR}/lib/profile.sh"
 # ── Profile-resolved variables (env var > profile > default) ──────────────────
-# Disable unbound-variable checking for associative array access (bash 3.2 compat)
-set +u
-export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
-set -u
+export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"

package/templates/do/deploy.d/managed-inference.ejs CHANGED Viewed

@@ -214,12 +214,9 @@ source "${SCRIPT_DIR}/config"
 source "${SCRIPT_DIR}/lib/profile.sh"
 # ── Profile-resolved variables (env var > profile > default) ──────────────────
-# Disable unbound-variable checking for associative array access (bash 3.2 compat)
-set +u
-ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
-ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
-export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
-set -u
+ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"
@@ -332,6 +329,18 @@ if [ -z "${ROLE_ARN:-}" ]; then
     exit 3
 fi
+# Validate ROLE_ARN looks like an IAM role ARN
+if ! echo "${ROLE_ARN}" | grep -qE '^arn:aws[a-z-]*:iam::[0-9]{12}:role/.+'; then
+    echo "❌ ROLE_ARN is not a valid IAM role ARN:"
+    echo "   Got: ${ROLE_ARN}"
+    echo "   Expected format: arn:aws:iam::123456789012:role/RoleName"
+    echo ""
+    echo "   This may indicate a misconfigured bootstrap profile."
+    echo "   Check ~/.ml-container-creator/config.json 'roleArn' field,"
+    echo "   or set the correct value: export ROLE_ARN=arn:aws:iam::ACCOUNT:role/YOUR_ROLE"
+    exit 3
+fi
 echo "   Using execution role: ${ROLE_ARN}"
 # Validate --ic argument if specified (set by --ic <name> or --force-ic <name>)