@aws/ml-container-creator 0.10.3 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/config/parameter-schema-v2.json +28 -1
  2. package/infra/ci-harness/lib/ci-harness-stack.ts +50 -36
  3. package/package.json +6 -5
  4. package/servers/instance-sizer/index.js +30 -17
  5. package/servers/instance-sizer/lib/instance-ranker.js +44 -0
  6. package/servers/lib/catalogs/instances.json +27 -0
  7. package/src/app.js +8 -1
  8. package/src/lib/bootstrap-command-handler.js +32 -3
  9. package/src/lib/config-validator.js +1 -1
  10. package/src/lib/generated/cli-options.js +7 -2
  11. package/src/lib/generated/parameter-matrix.js +16 -5
  12. package/src/lib/generated/validation-rules.js +7 -3
  13. package/src/lib/path-prover-brain.js +58 -1
  14. package/src/lib/prompts/infrastructure-prompts.js +2 -2
  15. package/src/lib/prompts/model-prompts.js +6 -0
  16. package/src/lib/secrets-prompt-runner.js +4 -0
  17. package/src/lib/template-manager.js +1 -1
  18. package/src/lib/template-variable-resolver.js +62 -0
  19. package/templates/do/adapter +5 -0
  20. package/templates/do/build +5 -0
  21. package/templates/do/clean.d/async-inference.ejs +5 -0
  22. package/templates/do/clean.d/batch-transform.ejs +5 -0
  23. package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
  24. package/templates/do/clean.d/managed-inference.ejs +5 -0
  25. package/templates/do/config +12 -45
  26. package/templates/do/deploy.d/async-inference.ejs +30 -3
  27. package/templates/do/deploy.d/batch-transform.ejs +29 -3
  28. package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
  29. package/templates/do/deploy.d/managed-inference.ejs +24 -3
  30. package/templates/do/lib/endpoint-config.sh +1 -1
  31. package/templates/do/lib/profile.sh +44 -0
  32. package/templates/do/push +5 -0
  33. package/templates/do/register +5 -0
  34. package/templates/do/stage +567 -0
  35. package/templates/do/submit +7 -0
  36. package/templates/do/test +1 -0
  37. package/templates/do/tune +4 -0
@@ -252,7 +252,7 @@ const infraInstancePrompts = [
252
252
  if (!input || input.trim() === '') {
253
253
  return 'At least one instance type is required';
254
254
  }
255
- const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
255
+ const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
256
256
  const instances = input.split(',').map(s => s.trim()).filter(s => s.length > 0);
257
257
  if (instances.length === 0) {
258
258
  return 'At least one instance type is required';
@@ -384,7 +384,7 @@ const infraInstancePrompts = [
384
384
  if (!input || input.trim() === '') {
385
385
  return 'Instance type is required';
386
386
  }
387
- const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
387
+ const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
388
388
  if (!instancePattern.test(input.trim())) {
389
389
  return 'Invalid instance type format. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g4dn.xlarge)';
390
390
  }
@@ -459,6 +459,12 @@ const hfTokenPrompts = [
459
459
  return false;
460
460
  }
461
461
 
462
+ // Skip HF token when model name is an S3 URI (no HF download needed)
463
+ const modelName = answers.customModelName || answers.modelName;
464
+ if (modelName && modelName.startsWith('s3://')) {
465
+ return false;
466
+ }
467
+
462
468
  // Display security warning before prompting
463
469
  console.log('\nšŸ” HuggingFace Authentication');
464
470
  console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
@@ -70,6 +70,10 @@ export default class SecretsPromptRunner {
70
70
  const modelSource = answers.modelSource;
71
71
  if (modelSource && modelSource !== 'huggingface') return false;
72
72
 
73
+ // Skip HF token when model name is an S3 URI (no HF download needed)
74
+ const modelName = answers.customModelName || answers.modelName;
75
+ if (modelName && modelName.startsWith('s3://')) return false;
76
+
73
77
  return true;
74
78
  }
75
79
 
@@ -146,7 +146,7 @@ export default class TemplateManager {
146
146
 
147
147
  // Validate instance type format (ml.*.*) - only for realtime-inference
148
148
  if (this.answers.instanceType && this.answers.instanceType !== 'custom') {
149
- const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
149
+ const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
150
150
  if (!instancePattern.test(this.answers.instanceType)) {
151
151
  throw new Error(`āš ļø Invalid instance type format: ${this.answers.instanceType}. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g5.xlarge)`);
152
152
  }
@@ -383,6 +383,68 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
383
383
  }
384
384
  }
385
385
 
386
+ // Auto-resolve tensor parallel degree from instance catalog GPU count.
387
+ // Only applies when:
388
+ // 1. The engine supports tensor parallelism (vLLM, SGLang, TensorRT-LLM, LMI)
389
+ // 2. The instance has multiple GPUs (gpus > 1)
390
+ // 3. The user has NOT explicitly set the TP env var via --server-env or --model-env
391
+ // This ensures multi-GPU instances default to full TP utilization without requiring
392
+ // the user to manually specify TENSOR_PARALLEL_SIZE.
393
+ // Requirements: FTP-1 (extension) — task 6.2
394
+ const _TP_ENGINE_MAP = {
395
+ 'vllm': 'VLLM_TENSOR_PARALLEL_SIZE',
396
+ 'vllm-omni': 'VLLM_OMNI_TENSOR_PARALLEL_SIZE',
397
+ 'sglang': 'SGLANG_TENSOR_PARALLEL_SIZE',
398
+ 'tensorrt-llm': 'TRTLLM_TENSOR_PARALLEL_SIZE',
399
+ 'lmi': 'OPTION_TENSOR_PARALLEL_DEGREE'
400
+ };
401
+
402
+ const tpEngine = answers.backend || answers.modelServer;
403
+ const tpEnvKey = tpEngine ? _TP_ENGINE_MAP[tpEngine] : null;
404
+
405
+ if (tpEnvKey && answers.instanceType) {
406
+ // Check if user explicitly set the TP value via --server-env (un-prefixed key)
407
+ const userServerEnvVars = answers.serverEnvVars || {};
408
+ const userExplicitlySetTP = (
409
+ userServerEnvVars['TENSOR_PARALLEL_SIZE'] !== undefined ||
410
+ userServerEnvVars['TENSOR_PARALLEL_DEGREE'] !== undefined ||
411
+ userServerEnvVars[tpEnvKey] !== undefined
412
+ );
413
+
414
+ if (!userExplicitlySetTP) {
415
+ // Look up GPU count from instance catalog
416
+ let instanceGpuCount = null;
417
+ if (answers.gpuCount) {
418
+ instanceGpuCount = answers.gpuCount;
419
+ } else if (answers.icGpuCount) {
420
+ instanceGpuCount = answers.icGpuCount;
421
+ } else {
422
+ try {
423
+ const catalogPath = path.resolve(__dirname, '..', '..', 'servers', 'lib', 'catalogs', 'instances.json');
424
+ const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
425
+ const instanceInfo = catalogData?.catalog?.[answers.instanceType];
426
+ if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
427
+ instanceGpuCount = instanceInfo.gpus;
428
+ }
429
+ } catch {
430
+ // Silently continue
431
+ }
432
+ }
433
+
434
+ // Auto-set TP to GPU count when instance has multiple GPUs
435
+ if (instanceGpuCount && instanceGpuCount > 1) {
436
+ if (!answers.envVars) {
437
+ answers.envVars = {};
438
+ }
439
+ answers.envVars[tpEnvKey] = String(instanceGpuCount);
440
+ answers.tensorParallelSize = instanceGpuCount;
441
+ answers._tpAutoResolved = true;
442
+ answers._tpAutoResolvedFrom = answers.instanceType;
443
+ console.log(` ā„¹ļø TP degree: ${instanceGpuCount} (auto-detected from ${answers.instanceType})`);
444
+ }
445
+ }
446
+ }
447
+
386
448
  // Determine tune support based on model presence in the tune catalog.
387
449
  // Used by the do/config template to write TUNE_SUPPORTED=true|false.
388
450
  if (answers.tuneSupported === undefined) {
@@ -18,6 +18,11 @@ set -o pipefail
18
18
  # ── Source project configuration ──────────────────────────────────────────────
19
19
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20
20
  source "${SCRIPT_DIR}/config"
21
+ source "${SCRIPT_DIR}/lib/profile.sh"
22
+
23
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
24
+ ADAPTER_S3_BUCKET="${ADAPTER_S3_BUCKET:-mlcc-adapters-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
25
+
21
26
  source "${SCRIPT_DIR}/lib/wait.sh"
22
27
 
23
28
  # ── Usage ─────────────────────────────────────────────────────────────────────
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  echo "šŸš€ Building Docker image for ${PROJECT_NAME}"
14
19
  echo " Deployment config: ${DEPLOYMENT_CONFIG}"
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  # Parse arguments
14
19
  CLEANUP_TARGET=""
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  # Parse arguments
14
19
  CLEANUP_TARGET=""
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  # Parse arguments
14
19
  CLEANUP_TARGET=""
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  # Parse arguments
14
19
  CLEANUP_TARGET=""
@@ -13,23 +13,23 @@ export MODEL_SERVER="<%= modelServer %>"
13
13
 
14
14
  # AWS configuration
15
15
  export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
16
- export ECR_REPOSITORY_NAME="ml-container-creator"
16
+
17
+ # ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
18
+ # ECR_REPOSITORY_NAME, ROLE_ARN, ADAPTER_S3_BUCKET — see do/lib/profile.sh
17
19
 
18
20
  <% if (typeof enableLora !== 'undefined' && enableLora) { %>
19
21
  # LoRA adapter serving
20
22
  export ENABLE_LORA=true
21
- export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
22
23
  <% } else if (framework === 'transformers' || framework === 'diffusors') { %>
23
24
  # LoRA adapter serving (uncomment to enable)
24
25
  # export ENABLE_LORA=true
25
- # export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
26
26
  <% } %>
27
27
 
28
28
  # Build configuration — WHERE the Docker image gets built
29
29
  export BUILD_TARGET="<%= buildTarget %>"
30
30
  <% if (buildTarget === 'codebuild') { %>
31
31
  export CODEBUILD_COMPUTE_TYPE="<%= codebuildComputeType %>"
32
- export CODEBUILD_PROJECT_NAME="${PROJECT_NAME}-build-$(date +%Y%m%d)"
32
+ # CODEBUILD_PROJECT_NAME — derived in do/submit at runtime
33
33
  <% } %>
34
34
 
35
35
  # Deployment configuration — WHERE the model runs
@@ -83,26 +83,9 @@ export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
83
83
  <% } %>
84
84
 
85
85
  # Async-specific configuration
86
- # Resolve AWS account ID at runtime for default resource names
87
- ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
88
-
89
- <% if (asyncS3OutputPath) { %>
90
- export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
91
- <% } else { %>
92
- export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
93
- <% } %>
94
-
95
- <% if (asyncSnsSuccessTopic) { %>
96
- export ASYNC_SNS_SUCCESS_TOPIC="<%= asyncSnsSuccessTopic %>"
97
- <% } else { %>
98
- export ASYNC_SNS_SUCCESS_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-success"
99
- <% } %>
100
-
101
- <% if (asyncSnsErrorTopic) { %>
102
- export ASYNC_SNS_ERROR_TOPIC="<%= asyncSnsErrorTopic %>"
103
- <% } else { %>
104
- export ASYNC_SNS_ERROR_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-error"
105
- <% } %>
86
+ # ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
87
+ # ASYNC_S3_OUTPUT_PATH, ASYNC_SNS_SUCCESS_TOPIC, ASYNC_SNS_ERROR_TOPIC — see do/lib/profile.sh
88
+ # ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/async-inference)
106
89
 
107
90
  <% if (asyncMaxConcurrentInvocations) { %>
108
91
  export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
@@ -129,19 +112,10 @@ export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
129
112
  # SageMaker Batch Transform configuration
130
113
  export INSTANCE_TYPE="<%= instanceType %>"
131
114
 
132
- # Resolve AWS account ID at runtime for default resource names
133
- ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "UNKNOWN")
115
+ # ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
116
+ # BATCH_INPUT_PATH, BATCH_OUTPUT_PATH — see do/lib/profile.sh
117
+ # ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/batch-transform)
134
118
 
135
- <% if (batchInputPath) { %>
136
- export BATCH_INPUT_PATH="<%= batchInputPath %>"
137
- <% } else { %>
138
- export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
139
- <% } %>
140
- <% if (batchOutputPath) { %>
141
- export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
142
- <% } else { %>
143
- export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
144
- <% } %>
145
119
  export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
146
120
  export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
147
121
  export BATCH_STRATEGY="<%= batchStrategy %>"
@@ -256,6 +230,8 @@ export NGC_API_KEY="<%= ngcApiKey %>"
256
230
 
257
231
  <% if (deploymentTarget !== 'batch-transform') { %>
258
232
  # Managed Model Customization (do/tune)
233
+ # ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
234
+ # TUNE_S3_BUCKET — see do/lib/profile.sh
259
235
  export TUNE_SUPPORTED=<%= (typeof tuneSupported !== 'undefined' && tuneSupported) ? 'true' : 'false' %>
260
236
  <% if (typeof tuneSupported !== 'undefined' && tuneSupported) { %>
261
237
  <% if (typeof tuneModelId !== 'undefined' && tuneModelId) { %>
@@ -270,7 +246,6 @@ export TUNE_MODEL_ID="<%= tuneModelId %>"
270
246
  # export TUNE_MODEL_ID=""
271
247
  <% } %>
272
248
  <% } %>
273
- export TUNE_S3_BUCKET="mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
274
249
  # MLflow App ARN for experiment tracking (set by bootstrap, or override manually)
275
250
  # export MLFLOW_APP_ARN=""
276
251
  <% } %>
@@ -296,13 +271,6 @@ export MODEL_FORMAT="<%= modelFormat %>"
296
271
  # export MODEL_FORMAT=""
297
272
  <% } %>
298
273
 
299
- <% if (roleArn) { %>
300
- export ROLE_ARN="<%= roleArn %>"
301
- <% } else { %>
302
- # IAM execution role for SageMaker (uncomment to override bootstrap role)
303
- # export ROLE_ARN=""
304
- <% } %>
305
-
306
274
  <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
307
275
  # SageMaker AI Benchmarking configuration
308
276
  export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
@@ -353,7 +321,6 @@ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
353
321
  <% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
354
322
  export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
355
323
  <% } %>
356
- export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
357
324
 
358
325
  # Print configuration summary
359
326
  echo "āš™ļø Configuration loaded"
@@ -38,6 +38,18 @@ done
38
38
  # Source configuration
39
39
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
40
40
  source "${SCRIPT_DIR}/config"
41
+ source "${SCRIPT_DIR}/lib/profile.sh"
42
+
43
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
44
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
45
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
46
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
47
+
48
+ # Async-specific derived variables
49
+ _ASYNC_BUCKET="${_PROFILE[asyncS3Bucket]:-mlcc-async-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
50
+ ASYNC_S3_OUTPUT_PATH="${ASYNC_S3_OUTPUT_PATH:-s3://${_ASYNC_BUCKET}/${PROJECT_NAME}/output/}"
51
+ ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
52
+ ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
41
53
 
42
54
  echo "šŸš€ Deploying to AWS"
43
55
  echo " Project: ${PROJECT_NAME}"
@@ -137,16 +149,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
137
149
  # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
138
150
  resolve_secrets
139
151
 
152
+ <% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
153
+ # ============================================================
154
+ # Inject server environment variables into container Environment
155
+ # ============================================================
156
+ <% Object.keys(serverEnvVars).forEach(function(key) { %>
157
+ if [ -n "${<%= key %>:-}" ]; then
158
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
159
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
160
+ else
161
+ CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
162
+ fi
163
+ fi
164
+ <% }); %>
165
+ <% } %>
166
+
140
167
  # Validate execution role ARN
141
168
  if [ -z "${ROLE_ARN:-}" ]; then
142
- echo "āŒ Execution role ARN not provided"
169
+ echo "āŒ ROLE_ARN is not set."
170
+ echo " Run 'ml-container-creator bootstrap' to configure your profile,"
171
+ echo " or set ROLE_ARN as an environment variable."
143
172
  echo ""
144
173
  echo "Usage:"
145
174
  echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
146
175
  echo " ./do/deploy"
147
176
  echo ""
148
- echo "Or set ROLE_ARN in do/config"
149
- echo ""
150
177
  echo "The execution role must have permissions for:"
151
178
  echo " • SageMaker model and endpoint management"
152
179
  echo " • ECR image access"
@@ -38,6 +38,17 @@ done
38
38
  # Source configuration
39
39
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
40
40
  source "${SCRIPT_DIR}/config"
41
+ source "${SCRIPT_DIR}/lib/profile.sh"
42
+
43
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
44
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
45
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
46
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
47
+
48
+ # Batch-specific derived variables
49
+ _BATCH_BUCKET="${_PROFILE[batchS3Bucket]:-mlcc-batch-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
50
+ BATCH_INPUT_PATH="${BATCH_INPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/input/}"
51
+ BATCH_OUTPUT_PATH="${BATCH_OUTPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/output/}"
41
52
 
42
53
  echo "šŸš€ Deploying to AWS"
43
54
  echo " Project: ${PROJECT_NAME}"
@@ -135,16 +146,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
135
146
  # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
136
147
  resolve_secrets
137
148
 
149
+ <% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
150
+ # ============================================================
151
+ # Inject server environment variables into container Environment
152
+ # ============================================================
153
+ <% Object.keys(serverEnvVars).forEach(function(key) { %>
154
+ if [ -n "${<%= key %>:-}" ]; then
155
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
156
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
157
+ else
158
+ CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
159
+ fi
160
+ fi
161
+ <% }); %>
162
+ <% } %>
163
+
138
164
  # Validate execution role ARN
139
165
  if [ -z "${ROLE_ARN:-}" ]; then
140
- echo "āŒ Execution role ARN not provided"
166
+ echo "āŒ ROLE_ARN is not set."
167
+ echo " Run 'ml-container-creator bootstrap' to configure your profile,"
168
+ echo " or set ROLE_ARN as an environment variable."
141
169
  echo ""
142
170
  echo "Usage:"
143
171
  echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
144
172
  echo " ./do/deploy"
145
173
  echo ""
146
- echo "Or set ROLE_ARN in do/config"
147
- echo ""
148
174
  echo "The execution role must have permissions for:"
149
175
  echo " • SageMaker model and transform job management"
150
176
  echo " • ECR image access"
@@ -38,6 +38,10 @@ done
38
38
  # Source configuration
39
39
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
40
40
  source "${SCRIPT_DIR}/config"
41
+ source "${SCRIPT_DIR}/lib/profile.sh"
42
+
43
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
44
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
41
45
 
42
46
  echo "šŸš€ Deploying to AWS"
43
47
  echo " Project: ${PROJECT_NAME}"
@@ -211,6 +211,12 @@ _ci_handle_existing_endpoint() {
211
211
  # Source configuration
212
212
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
213
213
  source "${SCRIPT_DIR}/config"
214
+ source "${SCRIPT_DIR}/lib/profile.sh"
215
+
216
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
217
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
218
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
219
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
214
220
 
215
221
  echo "šŸš€ Deploying to AWS"
216
222
  echo " Project: ${PROJECT_NAME}"
@@ -307,14 +313,14 @@ source "${SCRIPT_DIR}/lib/inference-component.sh"
307
313
 
308
314
  # Validate execution role ARN
309
315
  if [ -z "${ROLE_ARN:-}" ]; then
310
- echo "āŒ Execution role ARN not provided"
316
+ echo "āŒ ROLE_ARN is not set."
317
+ echo " Run 'ml-container-creator bootstrap' to configure your profile,"
318
+ echo " or set ROLE_ARN as an environment variable."
311
319
  echo ""
312
320
  echo "Usage:"
313
321
  echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
314
322
  echo " ./do/deploy"
315
323
  echo ""
316
- echo "Or set ROLE_ARN in do/config"
317
- echo ""
318
324
  echo "The execution role must have permissions for:"
319
325
  echo " • SageMaker endpoint and inference component management"
320
326
  echo " • ECR image access"
@@ -350,6 +356,21 @@ fi
350
356
  # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
351
357
  resolve_secrets
352
358
 
359
+ <% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
360
+ # ============================================================
361
+ # Inject server environment variables into container Environment
362
+ # ============================================================
363
+ <% Object.keys(serverEnvVars).forEach(function(key) { %>
364
+ if [ -n "${<%= key %>:-}" ]; then
365
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
366
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
367
+ else
368
+ CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
369
+ fi
370
+ fi
371
+ <% }); %>
372
+ <% } %>
373
+
353
374
  # ============================================================
354
375
  # CI-Mode: Idempotent deployment check (runs before normal idempotency)
355
376
  # ============================================================
@@ -165,7 +165,7 @@ create_endpoint_config() {
165
165
  # Optional: capacity reservation
166
166
  if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
167
167
  variant_json="${variant_json},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
168
- echo " āš ļø Capacity reservation (experimental): ${CAPACITY_RESERVATION_ARN}"
168
+ echo " āœ“ Capacity reservation: ${CAPACITY_RESERVATION_ARN}"
169
169
  fi
170
170
 
171
171
  variant_json="${variant_json}}]"
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env bash
2
+ # Profile loader — reads active bootstrap profile into _PROFILE[] associative array.
3
+ # Source this file after do/config. Values provide defaults; explicit env vars take precedence.
4
+ #
5
+ # Requires bash 4+ for associative array support.
6
+ # macOS ships with bash 3.2 — install bash 4+ via Homebrew: brew install bash
7
+ #
8
+ # Expected keys in _PROFILE:
9
+ # awsRegion, accountId, awsProfile, roleArn, ecrRepositoryName,
10
+ # benchmarkS3Bucket, ciBenchmarkResultsBucket, asyncS3Bucket, batchS3Bucket,
11
+ # ciTableName, ciInfraProvisioned
12
+
13
+ # Temporarily disable unbound variable checking for profile loading
14
+ # (keys may not exist in the profile config, and declare -A behavior
15
+ # varies across bash versions with set -u)
16
+ set +u 2>/dev/null || true
17
+
18
+ declare -A _PROFILE 2>/dev/null || true
19
+ if command -v python3 &>/dev/null; then
20
+ _PROFILE_RAW=$(python3 -c "
21
+ import json, os
22
+ try:
23
+ with open(os.path.expanduser('~/.ml-container-creator/config.json')) as f:
24
+ c = json.load(f)
25
+ p = c['profiles'][c['activeProfile']]
26
+ # Output as KEY=VALUE lines (simple, no JSON parsing in bash)
27
+ for k, v in p.items():
28
+ if isinstance(v, (str, int, float, bool)):
29
+ print(f'{k}={v}')
30
+ except:
31
+ pass
32
+ " 2>/dev/null) || _PROFILE_RAW=""
33
+
34
+ if [ -n "${_PROFILE_RAW}" ]; then
35
+ while IFS='=' read -r key value; do
36
+ [ -n "${key}" ] && _PROFILE["${key}"]="${value}"
37
+ done <<< "${_PROFILE_RAW}"
38
+ fi
39
+ fi
40
+
41
+ # NOTE: set -u is NOT re-enabled here. The caller is responsible for managing
42
+ # their own shell options. Re-enabling set -u would cause "unbound variable"
43
+ # errors when accessing _PROFILE keys on bash versions where empty associative
44
+ # arrays are treated as unset (bash 5.x on some platforms).
package/templates/do/push CHANGED
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
12
17
 
13
18
  echo "šŸš€ Pushing Docker image to Amazon ECR"
14
19
  echo " Project: ${PROJECT_NAME}"
@@ -9,6 +9,11 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
16
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
12
17
 
13
18
  # ============================================================
14
19
  # Register deployment to the deployment registry