npm - @aws/ml-container-creator - Versions diffs - 0.10.0 → 0.12.1 - Mend

@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/LICENSE-THIRD-PARTY +9304 -0
package/bin/cli.js +2 -0
package/config/bootstrap-e2e-stack.json +341 -0
package/config/bootstrap-stack.json +40 -3
package/config/parameter-schema-v2.json +33 -22
package/config/tune-catalog.json +1781 -0
package/infra/ci-harness/buildspec.yml +1 -0
package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
package/package.json +53 -67
package/servers/base-image-picker/index.js +121 -121
package/servers/e2e-status/index.js +297 -0
package/servers/e2e-status/manifest.json +14 -0
package/servers/e2e-status/package.json +15 -0
package/servers/endpoint-picker/LICENSE +202 -0
package/servers/endpoint-picker/index.js +536 -0
package/servers/endpoint-picker/manifest.json +14 -0
package/servers/endpoint-picker/package.json +18 -0
package/servers/hyperpod-cluster-picker/index.js +125 -125
package/servers/instance-sizer/index.js +166 -153
package/servers/instance-sizer/lib/instance-ranker.js +120 -76
package/servers/instance-sizer/lib/model-resolver.js +61 -61
package/servers/instance-sizer/lib/quota-resolver.js +113 -113
package/servers/instance-sizer/lib/vram-estimator.js +31 -31
package/servers/lib/bedrock-client.js +38 -38
package/servers/lib/catalogs/instances.json +27 -0
package/servers/lib/catalogs/model-servers.json +201 -3
package/servers/lib/custom-validators.js +13 -13
package/servers/lib/dynamic-resolver.js +4 -4
package/servers/marketplace-picker/index.js +342 -0
package/servers/marketplace-picker/manifest.json +14 -0
package/servers/marketplace-picker/package.json +18 -0
package/servers/model-picker/index.js +382 -382
package/servers/region-picker/index.js +56 -56
package/servers/workload-picker/LICENSE +202 -0
package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
package/servers/workload-picker/index.js +171 -0
package/servers/workload-picker/manifest.json +16 -0
package/servers/workload-picker/package.json +16 -0
package/src/app.js +12 -3
package/src/lib/bootstrap-command-handler.js +609 -15
package/src/lib/bootstrap-config.js +36 -0
package/src/lib/bootstrap-profile-manager.js +48 -41
package/src/lib/ci-register-helpers.js +74 -0
package/src/lib/config-loader.js +3 -0
package/src/lib/config-manager.js +7 -0
package/src/lib/config-validator.js +1 -1
package/src/lib/cuda-resolver.js +17 -8
package/src/lib/generated/cli-options.js +319 -314
package/src/lib/generated/parameter-matrix.js +672 -661
package/src/lib/generated/validation-rules.js +76 -72
package/src/lib/path-prover-brain.js +664 -0
package/src/lib/prompts/infrastructure-prompts.js +2 -2
package/src/lib/prompts/model-prompts.js +6 -0
package/src/lib/prompts/project-prompts.js +12 -0
package/src/lib/secrets-prompt-runner.js +4 -0
package/src/lib/template-manager.js +1 -1
package/src/lib/template-variable-resolver.js +87 -1
package/src/lib/tune-catalog-validator.js +37 -4
package/templates/Dockerfile +9 -0
package/templates/code/adapter_sidecar.py +444 -0
package/templates/code/serve +6 -0
package/templates/code/serve.d/vllm.ejs +1 -1
package/templates/do/.benchmark_writer.py +1476 -0
package/templates/do/.tune_helper.py +982 -57
package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
package/templates/do/adapter +154 -0
package/templates/do/benchmark +639 -85
package/templates/do/build +5 -0
package/templates/do/clean.d/async-inference.ejs +5 -0
package/templates/do/clean.d/batch-transform.ejs +5 -0
package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
package/templates/do/clean.d/managed-inference.ejs +5 -0
package/templates/do/config +115 -45
package/templates/do/deploy.d/async-inference.ejs +30 -3
package/templates/do/deploy.d/batch-transform.ejs +29 -3
package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
package/templates/do/deploy.d/managed-inference.ejs +216 -14
package/templates/do/lib/endpoint-config.sh +1 -1
package/templates/do/lib/profile.sh +44 -0
package/templates/do/optimize +106 -37
package/templates/do/push +5 -0
package/templates/do/register +94 -0
package/templates/do/stage +567 -0
package/templates/do/submit +7 -0
package/templates/do/test +14 -0
package/templates/do/tune +382 -59
package/templates/do/validate +44 -4

package/templates/do/build CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 echo "🚀 Building Docker image for ${PROJECT_NAME}"
 echo "   Deployment config: ${DEPLOYMENT_CONFIG}"

package/templates/do/clean.d/async-inference.ejs CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/clean.d/batch-transform.ejs CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/clean.d/hyperpod-eks.ejs CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/clean.d/managed-inference.ejs CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/config CHANGED Viewed

@@ -1,6 +1,7 @@
 #!/bin/bash
 # do-framework configuration
 # This file is sourced by all do scripts
+# Generated: <%= new Date().toISOString() %>
 # Project identification
 export PROJECT_NAME="<%= projectName %>"
@@ -10,21 +11,25 @@ export DEPLOYMENT_CONFIG="<%= deploymentConfig %>"
 export FRAMEWORK="<%= framework %>"
 export MODEL_SERVER="<%= modelServer %>"
+# AWS configuration
+export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
+# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
+# ECR_REPOSITORY_NAME, ROLE_ARN, ADAPTER_S3_BUCKET — see do/lib/profile.sh
 <% if (typeof enableLora !== 'undefined' && enableLora) { %>
 # LoRA adapter serving
 export ENABLE_LORA=true
-export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
+<% } else if (framework === 'transformers' || framework === 'diffusors') { %>
+# LoRA adapter serving (uncomment to enable)
+# export ENABLE_LORA=true
 <% } %>
-# AWS configuration
-export AWS_REGION="<%= awsRegion %>"
-export ECR_REPOSITORY_NAME="ml-container-creator"
 # Build configuration — WHERE the Docker image gets built
 export BUILD_TARGET="<%= buildTarget %>"
 <% if (buildTarget === 'codebuild') { %>
 export CODEBUILD_COMPUTE_TYPE="<%= codebuildComputeType %>"
-export CODEBUILD_PROJECT_NAME="${PROJECT_NAME}-build-$(date +%Y%m%d)"
+# CODEBUILD_PROJECT_NAME — derived in do/submit at runtime
 <% } %>
 # Deployment configuration — WHERE the model runs
@@ -42,14 +47,27 @@ export INSTANCE_TYPE="<%= instanceType %>"
 # Instance pools: heterogeneous instance types with priority-based fallback
 # Priority = selection order (1 = preferred, higher = fallback)
 export INSTANCE_POOLS='<%= JSON.stringify(instancePools) %>'
+<% } else { %>
+# Instance pools: heterogeneous instance types with priority-based fallback (uncomment to enable)
+# Format: [{"InstanceType":"ml.g6e.48xlarge","Priority":1},{"InstanceType":"ml.g5.48xlarge","Priority":2}]
+# export INSTANCE_POOLS='[]'
 <% } %>
 <% if (inferenceAmiVersion) { %>
 export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
+<% } else { %>
+# Inference AMI version — auto-resolved from CUDA version (uncomment to override)
+# Valid: al2-ami-sagemaker-inference-gpu-2, al2-ami-sagemaker-inference-gpu-2-1,
+#        al2-ami-sagemaker-inference-gpu-3-1, al2023-ami-sagemaker-inference-gpu-4-1
+# export INFERENCE_AMI_VERSION=""
 <% } %>
 <% if (typeof capacityReservationArn !== 'undefined' && capacityReservationArn) { %>
 # Note: Capacity reservations and instance pools (INSTANCE_POOLS) are mutually exclusive.
 # If both are set, the capacity reservation takes precedence and INSTANCE_POOLS is ignored.
 export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
+<% } else { %>
+# Capacity reservation (uncomment to use reserved capacity)
+# Note: Mutually exclusive with INSTANCE_POOLS — reservation takes precedence.
+# export CAPACITY_RESERVATION_ARN=""
 <% } %>
 <% } %>
 <% } %>
@@ -59,32 +77,21 @@ export CAPACITY_RESERVATION_ARN="<%= capacityReservationArn %>"
 export INSTANCE_TYPE="<%= instanceType %>"
 <% if (inferenceAmiVersion) { %>
 export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
-<% } %>
-# Async-specific configuration
-# Resolve AWS account ID at runtime for default resource names
-ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
-<% if (asyncS3OutputPath) { %>
-export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
-<% } else { %>
-export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
-<% } %>
-<% if (asyncSnsSuccessTopic) { %>
-export ASYNC_SNS_SUCCESS_TOPIC="<%= asyncSnsSuccessTopic %>"
 <% } else { %>
-export ASYNC_SNS_SUCCESS_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-success"
+# Inference AMI version — auto-resolved from CUDA version (uncomment to override)
+# export INFERENCE_AMI_VERSION=""
 <% } %>
-<% if (asyncSnsErrorTopic) { %>
-export ASYNC_SNS_ERROR_TOPIC="<%= asyncSnsErrorTopic %>"
-<% } else { %>
-export ASYNC_SNS_ERROR_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-error"
-<% } %>
+# Async-specific configuration
+# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
+# ASYNC_S3_OUTPUT_PATH, ASYNC_SNS_SUCCESS_TOPIC, ASYNC_SNS_ERROR_TOPIC — see do/lib/profile.sh
+# ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/async-inference)
 <% if (asyncMaxConcurrentInvocations) { %>
 export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
+<% } else { %>
+# Max concurrent invocations per instance (uncomment to set)
+# export ASYNC_MAX_CONCURRENT_INVOCATIONS=""
 <% } %>
 <% } %>
@@ -95,6 +102,9 @@ export HYPERPOD_NAMESPACE="<%= hyperPodNamespace %>"
 export HYPERPOD_REPLICAS="<%= hyperPodReplicas %>"
 <% if (fsxVolumeHandle) { %>
 export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
+<% } else { %>
+# FSx for Lustre volume for shared model storage (uncomment to enable)
+# export FSX_VOLUME_HANDLE=""
 <% } %>
 <% } %>
@@ -102,28 +112,25 @@ export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
 # SageMaker Batch Transform configuration
 export INSTANCE_TYPE="<%= instanceType %>"
-# Resolve AWS account ID at runtime for default resource names
-ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
+# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
+# BATCH_INPUT_PATH, BATCH_OUTPUT_PATH — see do/lib/profile.sh
+# ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/batch-transform)
-<% if (batchInputPath) { %>
-export BATCH_INPUT_PATH="<%= batchInputPath %>"
-<% } else { %>
-export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
-<% } %>
-<% if (batchOutputPath) { %>
-export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
-<% } else { %>
-export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
-<% } %>
 export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
 export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
 export BATCH_STRATEGY="<%= batchStrategy %>"
 export BATCH_JOIN_SOURCE="<%= batchJoinSource || 'None' %>"
 <% if (batchMaxConcurrentTransforms) { %>
 export BATCH_MAX_CONCURRENT_TRANSFORMS="<%= batchMaxConcurrentTransforms %>"
+<% } else { %>
+# Max concurrent transforms per instance (uncomment to set)
+# export BATCH_MAX_CONCURRENT_TRANSFORMS=""
 <% } %>
 <% if (batchMaxPayloadInMB) { %>
 export BATCH_MAX_PAYLOAD_IN_MB="<%= batchMaxPayloadInMB %>"
+<% } else { %>
+# Max payload size in MB (uncomment to set, default: 6)
+# export BATCH_MAX_PAYLOAD_IN_MB=""
 <% } %>
 <% } %>
@@ -140,6 +147,22 @@ export ENDPOINT_VARIANT_NAME="<%= endpointVariantName %>"
 export ENDPOINT_VOLUME_SIZE="<%= endpointVolumeSize %>"
 <% } %>
+<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference') { %>
+# ─── Endpoint overrides (uncomment to customize) ───────────────────────────────
+<% if (typeof endpointInitialInstanceCount === 'undefined' || endpointInitialInstanceCount == null) { %>
+# export ENDPOINT_INITIAL_INSTANCE_COUNT="1"    # Number of instances for the endpoint
+<% } %>
+<% if (typeof endpointDataCapturePercent === 'undefined' || endpointDataCapturePercent == null) { %>
+# export ENDPOINT_DATA_CAPTURE_PERCENT=""        # Percentage of requests to capture (0-100)
+<% } %>
+<% if (typeof endpointVariantName === 'undefined' || endpointVariantName == null) { %>
+# export ENDPOINT_VARIANT_NAME=""                # Custom variant name (default: AllTraffic)
+<% } %>
+<% if (typeof endpointVolumeSize === 'undefined' || endpointVolumeSize == null) { %>
+# export ENDPOINT_VOLUME_SIZE=""                 # EBS volume size in GB for model download
+<% } %>
+<% } %>
 <% if (typeof icCpuCount !== 'undefined' && icCpuCount != null) { %>
 export IC_CPU_COUNT="<%= icCpuCount %>"
 <% } %>
@@ -158,6 +181,22 @@ export IC_COPY_COUNT="<%= icCopyCount %>"
 export IC_MODEL_WEIGHT="<%= icModelWeight %>"
 <% } %>
+<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference') { %>
+# ─── Inference Component overrides (uncomment to customize) ────────────────────
+<% if (typeof icCpuCount === 'undefined' || icCpuCount == null) { %>
+# export IC_CPU_COUNT=""                         # CPU cores reserved for this IC
+<% } %>
+<% if (typeof icMemorySize === 'undefined' || icMemorySize == null) { %>
+# export IC_MEMORY_SIZE=""                       # Memory in MB reserved for this IC
+<% } %>
+<% if (typeof icCopyCount === 'undefined' || icCopyCount == null) { %>
+# export IC_COPY_COUNT=""                        # Number of model copies (multi-IC scaling)
+<% } %>
+<% if (typeof icModelWeight === 'undefined' || icModelWeight == null) { %>
+# export IC_MODEL_WEIGHT=""                      # Traffic weight for this IC (0-100)
+<% } %>
+<% } %>
 <% if (typeof modelEnvVars !== 'undefined' && modelEnvVars && Object.keys(modelEnvVars).length > 0) { %>
 # Model environment variables
 <% Object.entries(modelEnvVars).forEach(([key, value]) => { %>
@@ -191,8 +230,24 @@ export NGC_API_KEY="<%= ngcApiKey %>"
 <% if (deploymentTarget !== 'batch-transform') { %>
 # Managed Model Customization (do/tune)
+# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
+# TUNE_S3_BUCKET — see do/lib/profile.sh
 export TUNE_SUPPORTED=<%= (typeof tuneSupported !== 'undefined' && tuneSupported) ? 'true' : 'false' %>
-export TUNE_S3_BUCKET="mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
+<% if (typeof tuneSupported !== 'undefined' && tuneSupported) { %>
+<% if (typeof tuneModelId !== 'undefined' && tuneModelId) { %>
+# SageMaker AI Managed Fine-Tuning — JumpStart Hub model ID
+# Flow: JumpStart model (tune) → LoRA adapter (S3) → do/adapter add → vLLM
+export TUNE_MODEL_ID="<%= tuneModelId %>"
+<% } else { %>
+# SageMaker AI Managed Fine-Tuning — JumpStart Hub model ID
+# To find your model's Hub ID:
+#   aws sagemaker list-hub-contents --hub-name SageMakerPublicHub \
+#     --hub-content-type Model --query "HubContentSummaries[].HubContentName"
+# export TUNE_MODEL_ID=""
+<% } %>
+<% } %>
+# MLflow App ARN for experiment tracking (set by bootstrap, or override manually)
+# export MLFLOW_APP_ARN=""
 <% } %>
 <% } %>
@@ -210,10 +265,10 @@ export HF_TOKEN="<%= hfToken %>"
 <% if (modelFormat) { %>
 export MODEL_FORMAT="<%= modelFormat %>"
-<% } %>
-<% if (roleArn) { %>
-export ROLE_ARN="<%= roleArn %>"
+<% } else { %>
+# Model format (uncomment if using quantized models)
+# Valid: pkl, json, keras, safetensors, gguf, awq, gptq
+# export MODEL_FORMAT=""
 <% } %>
 <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
@@ -234,6 +289,23 @@ export BENCHMARK_S3_OUTPUT_PATH="s3://mlcc-benchmark-$(aws sts get-caller-identi
 <% } %>
 export BENCHMARK_JOB_NAME=""
 export BENCHMARK_WORKLOAD_CONFIG_NAME=""
+# CI Benchmark Athena persistence (set automatically from bootstrap --benchmark-infra)
+<% if (typeof ciBenchmarkResultsBucket !== 'undefined' && ciBenchmarkResultsBucket) { %>
+export CI_BENCHMARK_RESULTS_BUCKET="<%= ciBenchmarkResultsBucket %>"
+<% } else { %>
+# export CI_BENCHMARK_RESULTS_BUCKET=""            # S3 bucket for Athena Parquet results (set by bootstrap --benchmark-infra)
+<% } %>
+<% } else if (framework === 'transformers' && deploymentTarget !== 'batch-transform') { %>
+# ─── SageMaker AI Benchmarking (uncomment to enable) ──────────────────────────
+# export BENCHMARK_CONCURRENCY="10"              # Concurrent requests
+# export BENCHMARK_INPUT_TOKENS_MEAN="550"       # Mean input tokens per request
+# export BENCHMARK_OUTPUT_TOKENS_MEAN="150"      # Mean output tokens per request
+# export BENCHMARK_STREAMING="true"              # Enable streaming
+# export BENCHMARK_REQUEST_COUNT=""              # Total requests (empty = auto)
+# export BENCHMARK_S3_OUTPUT_PATH=""             # S3 path for results (empty = auto)
+# export BENCHMARK_JOB_NAME=""                   # Resume/check existing job
+# export BENCHMARK_WORKLOAD_CONFIG_NAME=""       # Reuse existing workload config
 <% } %>
 <% if (orderedEnvVars && orderedEnvVars.length > 0) { %>
@@ -246,11 +318,9 @@ export <%= key %>=${<%= key %>:-<%= value %>}
 export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
 # Allow environment variable overrides
-export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
 <% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
 export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
 <% } %>
-export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
 # Print configuration summary
 echo "⚙️  Configuration loaded"

package/templates/do/deploy.d/async-inference.ejs CHANGED Viewed

@@ -38,6 +38,18 @@ done
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
+# Async-specific derived variables
+_ASYNC_BUCKET="${_PROFILE[asyncS3Bucket]:-mlcc-async-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
+ASYNC_S3_OUTPUT_PATH="${ASYNC_S3_OUTPUT_PATH:-s3://${_ASYNC_BUCKET}/${PROJECT_NAME}/output/}"
+ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
+ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"
@@ -137,16 +149,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
 # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
 resolve_secrets
+<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
+# ============================================================
+# Inject server environment variables into container Environment
+# ============================================================
+<% Object.keys(serverEnvVars).forEach(function(key) { %>
+if [ -n "${<%= key %>:-}" ]; then
+    if [ -n "${CONTAINER_ENV_JSON}" ]; then
+        CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
+    else
+        CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
+    fi
+fi
+<% }); %>
+<% } %>
 # Validate execution role ARN
 if [ -z "${ROLE_ARN:-}" ]; then
-    echo "❌ Execution role ARN not provided"
+    echo "❌ ROLE_ARN is not set."
+    echo "   Run 'ml-container-creator bootstrap' to configure your profile,"
+    echo "   or set ROLE_ARN as an environment variable."
     echo ""
     echo "Usage:"
     echo "  export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
     echo "  ./do/deploy"
     echo ""
-    echo "Or set ROLE_ARN in do/config"
-    echo ""
     echo "The execution role must have permissions for:"
     echo "  • SageMaker model and endpoint management"
     echo "  • ECR image access"

package/templates/do/deploy.d/batch-transform.ejs CHANGED Viewed

@@ -38,6 +38,17 @@ done
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
+# Batch-specific derived variables
+_BATCH_BUCKET="${_PROFILE[batchS3Bucket]:-mlcc-batch-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
+BATCH_INPUT_PATH="${BATCH_INPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/input/}"
+BATCH_OUTPUT_PATH="${BATCH_OUTPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/output/}"
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"
@@ -135,16 +146,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
 # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
 resolve_secrets
+<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
+# ============================================================
+# Inject server environment variables into container Environment
+# ============================================================
+<% Object.keys(serverEnvVars).forEach(function(key) { %>
+if [ -n "${<%= key %>:-}" ]; then
+    if [ -n "${CONTAINER_ENV_JSON}" ]; then
+        CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
+    else
+        CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
+    fi
+fi
+<% }); %>
+<% } %>
 # Validate execution role ARN
 if [ -z "${ROLE_ARN:-}" ]; then
-    echo "❌ Execution role ARN not provided"
+    echo "❌ ROLE_ARN is not set."
+    echo "   Run 'ml-container-creator bootstrap' to configure your profile,"
+    echo "   or set ROLE_ARN as an environment variable."
     echo ""
     echo "Usage:"
     echo "  export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
     echo "  ./do/deploy"
     echo ""
-    echo "Or set ROLE_ARN in do/config"
-    echo ""
     echo "The execution role must have permissions for:"
     echo "  • SageMaker model and transform job management"
     echo "  • ECR image access"

package/templates/do/deploy.d/hyperpod-eks.ejs CHANGED Viewed

@@ -38,6 +38,10 @@ done
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"