npm - @aws/ml-container-creator - Versions diffs - 0.10.3 → 0.12.1 - Mend

@aws/ml-container-creator 0.10.3 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/config/parameter-schema-v2.json +28 -1
package/infra/ci-harness/lib/ci-harness-stack.ts +50 -36
package/package.json +6 -5
package/servers/instance-sizer/index.js +30 -17
package/servers/instance-sizer/lib/instance-ranker.js +44 -0
package/servers/lib/catalogs/instances.json +27 -0
package/src/app.js +8 -1
package/src/lib/bootstrap-command-handler.js +32 -3
package/src/lib/config-validator.js +1 -1
package/src/lib/generated/cli-options.js +7 -2
package/src/lib/generated/parameter-matrix.js +16 -5
package/src/lib/generated/validation-rules.js +7 -3
package/src/lib/path-prover-brain.js +58 -1
package/src/lib/prompts/infrastructure-prompts.js +2 -2
package/src/lib/prompts/model-prompts.js +6 -0
package/src/lib/secrets-prompt-runner.js +4 -0
package/src/lib/template-manager.js +1 -1
package/src/lib/template-variable-resolver.js +62 -0
package/templates/do/adapter +5 -0
package/templates/do/build +5 -0
package/templates/do/clean.d/async-inference.ejs +5 -0
package/templates/do/clean.d/batch-transform.ejs +5 -0
package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
package/templates/do/clean.d/managed-inference.ejs +5 -0
package/templates/do/config +12 -45
package/templates/do/deploy.d/async-inference.ejs +30 -3
package/templates/do/deploy.d/batch-transform.ejs +29 -3
package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
package/templates/do/deploy.d/managed-inference.ejs +24 -3
package/templates/do/lib/endpoint-config.sh +1 -1
package/templates/do/lib/profile.sh +44 -0
package/templates/do/push +5 -0
package/templates/do/register +5 -0
package/templates/do/stage +567 -0
package/templates/do/submit +7 -0
package/templates/do/test +1 -0
package/templates/do/tune +4 -0

package/src/lib/prompts/infrastructure-prompts.js CHANGED Viewed

@@ -252,7 +252,7 @@ const infraInstancePrompts = [
             if (!input || input.trim() === '') {
                 return 'At least one instance type is required';
             }
-            const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
+            const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
             const instances = input.split(',').map(s => s.trim()).filter(s => s.length > 0);
             if (instances.length === 0) {
                 return 'At least one instance type is required';
@@ -384,7 +384,7 @@ const infraInstancePrompts = [
             if (!input || input.trim() === '') {
                 return 'Instance type is required';
             }
-            const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
+            const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
             if (!instancePattern.test(input.trim())) {
                 return 'Invalid instance type format. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g4dn.xlarge)';
             }

package/src/lib/prompts/model-prompts.js CHANGED Viewed

@@ -459,6 +459,12 @@ const hfTokenPrompts = [
                 return false;
             }
+            // Skip HF token when model name is an S3 URI (no HF download needed)
+            const modelName = answers.customModelName || answers.modelName;
+            if (modelName && modelName.startsWith('s3://')) {
+                return false;
+            }
             // Display security warning before prompting
             console.log('\n🔐 HuggingFace Authentication');
             console.log('   Many models (e.g. Llama, Mistral) are gated and require a token.');

package/src/lib/secrets-prompt-runner.js CHANGED Viewed

@@ -70,6 +70,10 @@ export default class SecretsPromptRunner {
             const modelSource = answers.modelSource;
             if (modelSource && modelSource !== 'huggingface') return false;
+            // Skip HF token when model name is an S3 URI (no HF download needed)
+            const modelName = answers.customModelName || answers.modelName;
+            if (modelName && modelName.startsWith('s3://')) return false;
             return true;
         }

package/src/lib/template-manager.js CHANGED Viewed

@@ -146,7 +146,7 @@ export default class TemplateManager {
         // Validate instance type format (ml.*.*) - only for realtime-inference
         if (this.answers.instanceType && this.answers.instanceType !== 'custom') {
-            const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
+            const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
             if (!instancePattern.test(this.answers.instanceType)) {
                 throw new Error(`⚠️  Invalid instance type format: ${this.answers.instanceType}. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g5.xlarge)`);
             }

package/src/lib/template-variable-resolver.js CHANGED Viewed

@@ -383,6 +383,68 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
         }
     }
+    // Auto-resolve tensor parallel degree from instance catalog GPU count.
+    // Only applies when:
+    //   1. The engine supports tensor parallelism (vLLM, SGLang, TensorRT-LLM, LMI)
+    //   2. The instance has multiple GPUs (gpus > 1)
+    //   3. The user has NOT explicitly set the TP env var via --server-env or --model-env
+    // This ensures multi-GPU instances default to full TP utilization without requiring
+    // the user to manually specify TENSOR_PARALLEL_SIZE.
+    // Requirements: FTP-1 (extension) — task 6.2
+    const _TP_ENGINE_MAP = {
+        'vllm': 'VLLM_TENSOR_PARALLEL_SIZE',
+        'vllm-omni': 'VLLM_OMNI_TENSOR_PARALLEL_SIZE',
+        'sglang': 'SGLANG_TENSOR_PARALLEL_SIZE',
+        'tensorrt-llm': 'TRTLLM_TENSOR_PARALLEL_SIZE',
+        'lmi': 'OPTION_TENSOR_PARALLEL_DEGREE'
+    };
+    const tpEngine = answers.backend || answers.modelServer;
+    const tpEnvKey = tpEngine ? _TP_ENGINE_MAP[tpEngine] : null;
+    if (tpEnvKey && answers.instanceType) {
+        // Check if user explicitly set the TP value via --server-env (un-prefixed key)
+        const userServerEnvVars = answers.serverEnvVars || {};
+        const userExplicitlySetTP = (
+            userServerEnvVars['TENSOR_PARALLEL_SIZE'] !== undefined ||
+            userServerEnvVars['TENSOR_PARALLEL_DEGREE'] !== undefined ||
+            userServerEnvVars[tpEnvKey] !== undefined
+        );
+        if (!userExplicitlySetTP) {
+            // Look up GPU count from instance catalog
+            let instanceGpuCount = null;
+            if (answers.gpuCount) {
+                instanceGpuCount = answers.gpuCount;
+            } else if (answers.icGpuCount) {
+                instanceGpuCount = answers.icGpuCount;
+            } else {
+                try {
+                    const catalogPath = path.resolve(__dirname, '..', '..', 'servers', 'lib', 'catalogs', 'instances.json');
+                    const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
+                    const instanceInfo = catalogData?.catalog?.[answers.instanceType];
+                    if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
+                        instanceGpuCount = instanceInfo.gpus;
+                    }
+                } catch {
+                    // Silently continue
+                }
+            }
+            // Auto-set TP to GPU count when instance has multiple GPUs
+            if (instanceGpuCount && instanceGpuCount > 1) {
+                if (!answers.envVars) {
+                    answers.envVars = {};
+                }
+                answers.envVars[tpEnvKey] = String(instanceGpuCount);
+                answers.tensorParallelSize = instanceGpuCount;
+                answers._tpAutoResolved = true;
+                answers._tpAutoResolvedFrom = answers.instanceType;
+                console.log(`    ℹ️  TP degree: ${instanceGpuCount} (auto-detected from ${answers.instanceType})`);
+            }
+        }
+    }
     // Determine tune support based on model presence in the tune catalog.
     // Used by the do/config template to write TUNE_SUPPORTED=true|false.
     if (answers.tuneSupported === undefined) {

package/templates/do/adapter CHANGED Viewed

@@ -18,6 +18,11 @@ set -o pipefail
 # ── Source project configuration ──────────────────────────────────────────────
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ADAPTER_S3_BUCKET="${ADAPTER_S3_BUCKET:-mlcc-adapters-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
 source "${SCRIPT_DIR}/lib/wait.sh"
 # ── Usage ─────────────────────────────────────────────────────────────────────

package/templates/do/build CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 echo "🚀 Building Docker image for ${PROJECT_NAME}"
 echo "   Deployment config: ${DEPLOYMENT_CONFIG}"

package/templates/do/clean.d/async-inference.ejs CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/clean.d/batch-transform.ejs CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/clean.d/hyperpod-eks.ejs CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/clean.d/managed-inference.ejs CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 # Parse arguments
 CLEANUP_TARGET=""

package/templates/do/config CHANGED Viewed

@@ -13,23 +13,23 @@ export MODEL_SERVER="<%= modelServer %>"
 # AWS configuration
 export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
-export ECR_REPOSITORY_NAME="ml-container-creator"
+# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
+# ECR_REPOSITORY_NAME, ROLE_ARN, ADAPTER_S3_BUCKET — see do/lib/profile.sh
 <% if (typeof enableLora !== 'undefined' && enableLora) { %>
 # LoRA adapter serving
 export ENABLE_LORA=true
-export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
 <% } else if (framework === 'transformers' || framework === 'diffusors') { %>
 # LoRA adapter serving (uncomment to enable)
 # export ENABLE_LORA=true
-# export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
 <% } %>
 # Build configuration — WHERE the Docker image gets built
 export BUILD_TARGET="<%= buildTarget %>"
 <% if (buildTarget === 'codebuild') { %>
 export CODEBUILD_COMPUTE_TYPE="<%= codebuildComputeType %>"
-export CODEBUILD_PROJECT_NAME="${PROJECT_NAME}-build-$(date +%Y%m%d)"
+# CODEBUILD_PROJECT_NAME — derived in do/submit at runtime
 <% } %>
 # Deployment configuration — WHERE the model runs
@@ -83,26 +83,9 @@ export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
 <% } %>
 # Async-specific configuration
-# Resolve AWS account ID at runtime for default resource names
-ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
-<% if (asyncS3OutputPath) { %>
-export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
-<% } else { %>
-export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
-<% } %>
-<% if (asyncSnsSuccessTopic) { %>
-export ASYNC_SNS_SUCCESS_TOPIC="<%= asyncSnsSuccessTopic %>"
-<% } else { %>
-export ASYNC_SNS_SUCCESS_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-success"
-<% } %>
-<% if (asyncSnsErrorTopic) { %>
-export ASYNC_SNS_ERROR_TOPIC="<%= asyncSnsErrorTopic %>"
-<% } else { %>
-export ASYNC_SNS_ERROR_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-error"
-<% } %>
+# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
+# ASYNC_S3_OUTPUT_PATH, ASYNC_SNS_SUCCESS_TOPIC, ASYNC_SNS_ERROR_TOPIC — see do/lib/profile.sh
+# ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/async-inference)
 <% if (asyncMaxConcurrentInvocations) { %>
 export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
@@ -129,19 +112,10 @@ export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
 # SageMaker Batch Transform configuration
 export INSTANCE_TYPE="<%= instanceType %>"
-# Resolve AWS account ID at runtime for default resource names
-ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
+# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
+# BATCH_INPUT_PATH, BATCH_OUTPUT_PATH — see do/lib/profile.sh
+# ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/batch-transform)
-<% if (batchInputPath) { %>
-export BATCH_INPUT_PATH="<%= batchInputPath %>"
-<% } else { %>
-export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
-<% } %>
-<% if (batchOutputPath) { %>
-export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
-<% } else { %>
-export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
-<% } %>
 export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
 export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
 export BATCH_STRATEGY="<%= batchStrategy %>"
@@ -256,6 +230,8 @@ export NGC_API_KEY="<%= ngcApiKey %>"
 <% if (deploymentTarget !== 'batch-transform') { %>
 # Managed Model Customization (do/tune)
+# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
+# TUNE_S3_BUCKET — see do/lib/profile.sh
 export TUNE_SUPPORTED=<%= (typeof tuneSupported !== 'undefined' && tuneSupported) ? 'true' : 'false' %>
 <% if (typeof tuneSupported !== 'undefined' && tuneSupported) { %>
 <% if (typeof tuneModelId !== 'undefined' && tuneModelId) { %>
@@ -270,7 +246,6 @@ export TUNE_MODEL_ID="<%= tuneModelId %>"
 # export TUNE_MODEL_ID=""
 <% } %>
 <% } %>
-export TUNE_S3_BUCKET="mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
 # MLflow App ARN for experiment tracking (set by bootstrap, or override manually)
 # export MLFLOW_APP_ARN=""
 <% } %>
@@ -296,13 +271,6 @@ export MODEL_FORMAT="<%= modelFormat %>"
 # export MODEL_FORMAT=""
 <% } %>
-<% if (roleArn) { %>
-export ROLE_ARN="<%= roleArn %>"
-<% } else { %>
-# IAM execution role for SageMaker (uncomment to override bootstrap role)
-# export ROLE_ARN=""
-<% } %>
 <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
 # SageMaker AI Benchmarking configuration
 export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
@@ -353,7 +321,6 @@ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
 <% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
 export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
 <% } %>
-export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
 # Print configuration summary
 echo "⚙️  Configuration loaded"

package/templates/do/deploy.d/async-inference.ejs CHANGED Viewed

@@ -38,6 +38,18 @@ done
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
+# Async-specific derived variables
+_ASYNC_BUCKET="${_PROFILE[asyncS3Bucket]:-mlcc-async-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
+ASYNC_S3_OUTPUT_PATH="${ASYNC_S3_OUTPUT_PATH:-s3://${_ASYNC_BUCKET}/${PROJECT_NAME}/output/}"
+ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
+ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"
@@ -137,16 +149,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
 # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
 resolve_secrets
+<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
+# ============================================================
+# Inject server environment variables into container Environment
+# ============================================================
+<% Object.keys(serverEnvVars).forEach(function(key) { %>
+if [ -n "${<%= key %>:-}" ]; then
+    if [ -n "${CONTAINER_ENV_JSON}" ]; then
+        CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
+    else
+        CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
+    fi
+fi
+<% }); %>
+<% } %>
 # Validate execution role ARN
 if [ -z "${ROLE_ARN:-}" ]; then
-    echo "❌ Execution role ARN not provided"
+    echo "❌ ROLE_ARN is not set."
+    echo "   Run 'ml-container-creator bootstrap' to configure your profile,"
+    echo "   or set ROLE_ARN as an environment variable."
     echo ""
     echo "Usage:"
     echo "  export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
     echo "  ./do/deploy"
     echo ""
-    echo "Or set ROLE_ARN in do/config"
-    echo ""
     echo "The execution role must have permissions for:"
     echo "  • SageMaker model and endpoint management"
     echo "  • ECR image access"

package/templates/do/deploy.d/batch-transform.ejs CHANGED Viewed

@@ -38,6 +38,17 @@ done
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
+# Batch-specific derived variables
+_BATCH_BUCKET="${_PROFILE[batchS3Bucket]:-mlcc-batch-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
+BATCH_INPUT_PATH="${BATCH_INPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/input/}"
+BATCH_OUTPUT_PATH="${BATCH_OUTPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/output/}"
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"
@@ -135,16 +146,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
 # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
 resolve_secrets
+<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
+# ============================================================
+# Inject server environment variables into container Environment
+# ============================================================
+<% Object.keys(serverEnvVars).forEach(function(key) { %>
+if [ -n "${<%= key %>:-}" ]; then
+    if [ -n "${CONTAINER_ENV_JSON}" ]; then
+        CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
+    else
+        CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
+    fi
+fi
+<% }); %>
+<% } %>
 # Validate execution role ARN
 if [ -z "${ROLE_ARN:-}" ]; then
-    echo "❌ Execution role ARN not provided"
+    echo "❌ ROLE_ARN is not set."
+    echo "   Run 'ml-container-creator bootstrap' to configure your profile,"
+    echo "   or set ROLE_ARN as an environment variable."
     echo ""
     echo "Usage:"
     echo "  export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
     echo "  ./do/deploy"
     echo ""
-    echo "Or set ROLE_ARN in do/config"
-    echo ""
     echo "The execution role must have permissions for:"
     echo "  • SageMaker model and transform job management"
     echo "  • ECR image access"

package/templates/do/deploy.d/hyperpod-eks.ejs CHANGED Viewed

@@ -38,6 +38,10 @@ done
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"

package/templates/do/deploy.d/managed-inference.ejs CHANGED Viewed

@@ -211,6 +211,12 @@ _ci_handle_existing_endpoint() {
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 echo "🚀 Deploying to AWS"
 echo "   Project: ${PROJECT_NAME}"
@@ -307,14 +313,14 @@ source "${SCRIPT_DIR}/lib/inference-component.sh"
 # Validate execution role ARN
 if [ -z "${ROLE_ARN:-}" ]; then
-    echo "❌ Execution role ARN not provided"
+    echo "❌ ROLE_ARN is not set."
+    echo "   Run 'ml-container-creator bootstrap' to configure your profile,"
+    echo "   or set ROLE_ARN as an environment variable."
     echo ""
     echo "Usage:"
     echo "  export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
     echo "  ./do/deploy"
     echo ""
-    echo "Or set ROLE_ARN in do/config"
-    echo ""
     echo "The execution role must have permissions for:"
     echo "  • SageMaker endpoint and inference component management"
     echo "  • ECR image access"
@@ -350,6 +356,21 @@ fi
 # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
 resolve_secrets
+<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
+# ============================================================
+# Inject server environment variables into container Environment
+# ============================================================
+<% Object.keys(serverEnvVars).forEach(function(key) { %>
+if [ -n "${<%= key %>:-}" ]; then
+    if [ -n "${CONTAINER_ENV_JSON}" ]; then
+        CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
+    else
+        CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
+    fi
+fi
+<% }); %>
+<% } %>
 # ============================================================
 # CI-Mode: Idempotent deployment check (runs before normal idempotency)
 # ============================================================

package/templates/do/lib/endpoint-config.sh CHANGED Viewed

@@ -165,7 +165,7 @@ create_endpoint_config() {
         # Optional: capacity reservation
         if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
             variant_json="${variant_json},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
-            echo "   ⚠️  Capacity reservation (experimental): ${CAPACITY_RESERVATION_ARN}"
+            echo "   ✓ Capacity reservation: ${CAPACITY_RESERVATION_ARN}"
         fi
         variant_json="${variant_json}}]"

package/templates/do/lib/profile.sh ADDED Viewed

@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# Profile loader — reads active bootstrap profile into _PROFILE[] associative array.
+# Source this file after do/config. Values provide defaults; explicit env vars take precedence.
+#
+# Requires bash 4+ for associative array support.
+# macOS ships with bash 3.2 — install bash 4+ via Homebrew: brew install bash
+#
+# Expected keys in _PROFILE:
+#   awsRegion, accountId, awsProfile, roleArn, ecrRepositoryName,
+#   benchmarkS3Bucket, ciBenchmarkResultsBucket, asyncS3Bucket, batchS3Bucket,
+#   ciTableName, ciInfraProvisioned
+# Temporarily disable unbound variable checking for profile loading
+# (keys may not exist in the profile config, and declare -A behavior
+# varies across bash versions with set -u)
+set +u 2>/dev/null || true
+declare -A _PROFILE 2>/dev/null || true
+if command -v python3 &>/dev/null; then
+    _PROFILE_RAW=$(python3 -c "
+import json, os
+try:
+    with open(os.path.expanduser('~/.ml-container-creator/config.json')) as f:
+        c = json.load(f)
+    p = c['profiles'][c['activeProfile']]
+    # Output as KEY=VALUE lines (simple, no JSON parsing in bash)
+    for k, v in p.items():
+        if isinstance(v, (str, int, float, bool)):
+            print(f'{k}={v}')
+except:
+    pass
+" 2>/dev/null) || _PROFILE_RAW=""
+    if [ -n "${_PROFILE_RAW}" ]; then
+        while IFS='=' read -r key value; do
+            [ -n "${key}" ] && _PROFILE["${key}"]="${value}"
+        done <<< "${_PROFILE_RAW}"
+    fi
+fi
+# NOTE: set -u is NOT re-enabled here. The caller is responsible for managing
+# their own shell options. Re-enabling set -u would cause "unbound variable"
+# errors when accessing _PROFILE keys on bash versions where empty associative
+# arrays are treated as unset (bash 5.x on some platforms).

package/templates/do/push CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
+export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
 echo "🚀 Pushing Docker image to Amazon ECR"
 echo "   Project: ${PROJECT_NAME}"

package/templates/do/register CHANGED Viewed

@@ -9,6 +9,11 @@ set -o pipefail
 # Source configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
+source "${SCRIPT_DIR}/lib/profile.sh"
+# ── Profile-resolved variables (env var > profile > default) ──────────────────
+ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
+ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
 # ============================================================
 # Register deployment to the deployment registry