@aws/ml-container-creator 0.10.3 ā 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/parameter-schema-v2.json +28 -1
- package/infra/ci-harness/lib/ci-harness-stack.ts +50 -36
- package/package.json +6 -5
- package/servers/instance-sizer/index.js +30 -17
- package/servers/instance-sizer/lib/instance-ranker.js +44 -0
- package/servers/lib/catalogs/instances.json +27 -0
- package/src/app.js +8 -1
- package/src/lib/bootstrap-command-handler.js +32 -3
- package/src/lib/config-validator.js +1 -1
- package/src/lib/generated/cli-options.js +7 -2
- package/src/lib/generated/parameter-matrix.js +16 -5
- package/src/lib/generated/validation-rules.js +7 -3
- package/src/lib/path-prover-brain.js +58 -1
- package/src/lib/prompts/infrastructure-prompts.js +2 -2
- package/src/lib/prompts/model-prompts.js +6 -0
- package/src/lib/secrets-prompt-runner.js +4 -0
- package/src/lib/template-manager.js +1 -1
- package/src/lib/template-variable-resolver.js +62 -0
- package/templates/do/adapter +5 -0
- package/templates/do/build +5 -0
- package/templates/do/clean.d/async-inference.ejs +5 -0
- package/templates/do/clean.d/batch-transform.ejs +5 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
- package/templates/do/clean.d/managed-inference.ejs +5 -0
- package/templates/do/config +12 -45
- package/templates/do/deploy.d/async-inference.ejs +30 -3
- package/templates/do/deploy.d/batch-transform.ejs +29 -3
- package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
- package/templates/do/deploy.d/managed-inference.ejs +24 -3
- package/templates/do/lib/endpoint-config.sh +1 -1
- package/templates/do/lib/profile.sh +44 -0
- package/templates/do/push +5 -0
- package/templates/do/register +5 -0
- package/templates/do/stage +567 -0
- package/templates/do/submit +7 -0
- package/templates/do/test +1 -0
- package/templates/do/tune +4 -0
|
@@ -252,7 +252,7 @@ const infraInstancePrompts = [
|
|
|
252
252
|
if (!input || input.trim() === '') {
|
|
253
253
|
return 'At least one instance type is required';
|
|
254
254
|
}
|
|
255
|
-
const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
255
|
+
const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
256
256
|
const instances = input.split(',').map(s => s.trim()).filter(s => s.length > 0);
|
|
257
257
|
if (instances.length === 0) {
|
|
258
258
|
return 'At least one instance type is required';
|
|
@@ -384,7 +384,7 @@ const infraInstancePrompts = [
|
|
|
384
384
|
if (!input || input.trim() === '') {
|
|
385
385
|
return 'Instance type is required';
|
|
386
386
|
}
|
|
387
|
-
const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
387
|
+
const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
388
388
|
if (!instancePattern.test(input.trim())) {
|
|
389
389
|
return 'Invalid instance type format. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g4dn.xlarge)';
|
|
390
390
|
}
|
|
@@ -459,6 +459,12 @@ const hfTokenPrompts = [
|
|
|
459
459
|
return false;
|
|
460
460
|
}
|
|
461
461
|
|
|
462
|
+
// Skip HF token when model name is an S3 URI (no HF download needed)
|
|
463
|
+
const modelName = answers.customModelName || answers.modelName;
|
|
464
|
+
if (modelName && modelName.startsWith('s3://')) {
|
|
465
|
+
return false;
|
|
466
|
+
}
|
|
467
|
+
|
|
462
468
|
// Display security warning before prompting
|
|
463
469
|
console.log('\nš HuggingFace Authentication');
|
|
464
470
|
console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
|
|
@@ -70,6 +70,10 @@ export default class SecretsPromptRunner {
|
|
|
70
70
|
const modelSource = answers.modelSource;
|
|
71
71
|
if (modelSource && modelSource !== 'huggingface') return false;
|
|
72
72
|
|
|
73
|
+
// Skip HF token when model name is an S3 URI (no HF download needed)
|
|
74
|
+
const modelName = answers.customModelName || answers.modelName;
|
|
75
|
+
if (modelName && modelName.startsWith('s3://')) return false;
|
|
76
|
+
|
|
73
77
|
return true;
|
|
74
78
|
}
|
|
75
79
|
|
|
@@ -146,7 +146,7 @@ export default class TemplateManager {
|
|
|
146
146
|
|
|
147
147
|
// Validate instance type format (ml.*.*) - only for realtime-inference
|
|
148
148
|
if (this.answers.instanceType && this.answers.instanceType !== 'custom') {
|
|
149
|
-
const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
149
|
+
const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
150
150
|
if (!instancePattern.test(this.answers.instanceType)) {
|
|
151
151
|
throw new Error(`ā ļø Invalid instance type format: ${this.answers.instanceType}. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g5.xlarge)`);
|
|
152
152
|
}
|
|
@@ -383,6 +383,68 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
|
|
|
383
383
|
}
|
|
384
384
|
}
|
|
385
385
|
|
|
386
|
+
// Auto-resolve tensor parallel degree from instance catalog GPU count.
|
|
387
|
+
// Only applies when:
|
|
388
|
+
// 1. The engine supports tensor parallelism (vLLM, SGLang, TensorRT-LLM, LMI)
|
|
389
|
+
// 2. The instance has multiple GPUs (gpus > 1)
|
|
390
|
+
// 3. The user has NOT explicitly set the TP env var via --server-env or --model-env
|
|
391
|
+
// This ensures multi-GPU instances default to full TP utilization without requiring
|
|
392
|
+
// the user to manually specify TENSOR_PARALLEL_SIZE.
|
|
393
|
+
// Requirements: FTP-1 (extension) ā task 6.2
|
|
394
|
+
const _TP_ENGINE_MAP = {
|
|
395
|
+
'vllm': 'VLLM_TENSOR_PARALLEL_SIZE',
|
|
396
|
+
'vllm-omni': 'VLLM_OMNI_TENSOR_PARALLEL_SIZE',
|
|
397
|
+
'sglang': 'SGLANG_TENSOR_PARALLEL_SIZE',
|
|
398
|
+
'tensorrt-llm': 'TRTLLM_TENSOR_PARALLEL_SIZE',
|
|
399
|
+
'lmi': 'OPTION_TENSOR_PARALLEL_DEGREE'
|
|
400
|
+
};
|
|
401
|
+
|
|
402
|
+
const tpEngine = answers.backend || answers.modelServer;
|
|
403
|
+
const tpEnvKey = tpEngine ? _TP_ENGINE_MAP[tpEngine] : null;
|
|
404
|
+
|
|
405
|
+
if (tpEnvKey && answers.instanceType) {
|
|
406
|
+
// Check if user explicitly set the TP value via --server-env (un-prefixed key)
|
|
407
|
+
const userServerEnvVars = answers.serverEnvVars || {};
|
|
408
|
+
const userExplicitlySetTP = (
|
|
409
|
+
userServerEnvVars['TENSOR_PARALLEL_SIZE'] !== undefined ||
|
|
410
|
+
userServerEnvVars['TENSOR_PARALLEL_DEGREE'] !== undefined ||
|
|
411
|
+
userServerEnvVars[tpEnvKey] !== undefined
|
|
412
|
+
);
|
|
413
|
+
|
|
414
|
+
if (!userExplicitlySetTP) {
|
|
415
|
+
// Look up GPU count from instance catalog
|
|
416
|
+
let instanceGpuCount = null;
|
|
417
|
+
if (answers.gpuCount) {
|
|
418
|
+
instanceGpuCount = answers.gpuCount;
|
|
419
|
+
} else if (answers.icGpuCount) {
|
|
420
|
+
instanceGpuCount = answers.icGpuCount;
|
|
421
|
+
} else {
|
|
422
|
+
try {
|
|
423
|
+
const catalogPath = path.resolve(__dirname, '..', '..', 'servers', 'lib', 'catalogs', 'instances.json');
|
|
424
|
+
const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
|
|
425
|
+
const instanceInfo = catalogData?.catalog?.[answers.instanceType];
|
|
426
|
+
if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
|
|
427
|
+
instanceGpuCount = instanceInfo.gpus;
|
|
428
|
+
}
|
|
429
|
+
} catch {
|
|
430
|
+
// Silently continue
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// Auto-set TP to GPU count when instance has multiple GPUs
|
|
435
|
+
if (instanceGpuCount && instanceGpuCount > 1) {
|
|
436
|
+
if (!answers.envVars) {
|
|
437
|
+
answers.envVars = {};
|
|
438
|
+
}
|
|
439
|
+
answers.envVars[tpEnvKey] = String(instanceGpuCount);
|
|
440
|
+
answers.tensorParallelSize = instanceGpuCount;
|
|
441
|
+
answers._tpAutoResolved = true;
|
|
442
|
+
answers._tpAutoResolvedFrom = answers.instanceType;
|
|
443
|
+
console.log(` ā¹ļø TP degree: ${instanceGpuCount} (auto-detected from ${answers.instanceType})`);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
|
|
386
448
|
// Determine tune support based on model presence in the tune catalog.
|
|
387
449
|
// Used by the do/config template to write TUNE_SUPPORTED=true|false.
|
|
388
450
|
if (answers.tuneSupported === undefined) {
|
package/templates/do/adapter
CHANGED
|
@@ -18,6 +18,11 @@ set -o pipefail
|
|
|
18
18
|
# āā Source project configuration āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
|
|
19
19
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
20
20
|
source "${SCRIPT_DIR}/config"
|
|
21
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
22
|
+
|
|
23
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
24
|
+
ADAPTER_S3_BUCKET="${ADAPTER_S3_BUCKET:-mlcc-adapters-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
25
|
+
|
|
21
26
|
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
22
27
|
|
|
23
28
|
# āā Usage āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
|
package/templates/do/build
CHANGED
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
echo "š Building Docker image for ${PROJECT_NAME}"
|
|
14
19
|
echo " Deployment config: ${DEPLOYMENT_CONFIG}"
|
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
# Parse arguments
|
|
14
19
|
CLEANUP_TARGET=""
|
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
# Parse arguments
|
|
14
19
|
CLEANUP_TARGET=""
|
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
# Parse arguments
|
|
14
19
|
CLEANUP_TARGET=""
|
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
# Parse arguments
|
|
14
19
|
CLEANUP_TARGET=""
|
package/templates/do/config
CHANGED
|
@@ -13,23 +13,23 @@ export MODEL_SERVER="<%= modelServer %>"
|
|
|
13
13
|
|
|
14
14
|
# AWS configuration
|
|
15
15
|
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
# āā Profile-resolved values (from ~/.ml-container-creator/config.json) āāāā
|
|
18
|
+
# ECR_REPOSITORY_NAME, ROLE_ARN, ADAPTER_S3_BUCKET ā see do/lib/profile.sh
|
|
17
19
|
|
|
18
20
|
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
19
21
|
# LoRA adapter serving
|
|
20
22
|
export ENABLE_LORA=true
|
|
21
|
-
export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
22
23
|
<% } else if (framework === 'transformers' || framework === 'diffusors') { %>
|
|
23
24
|
# LoRA adapter serving (uncomment to enable)
|
|
24
25
|
# export ENABLE_LORA=true
|
|
25
|
-
# export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
26
26
|
<% } %>
|
|
27
27
|
|
|
28
28
|
# Build configuration ā WHERE the Docker image gets built
|
|
29
29
|
export BUILD_TARGET="<%= buildTarget %>"
|
|
30
30
|
<% if (buildTarget === 'codebuild') { %>
|
|
31
31
|
export CODEBUILD_COMPUTE_TYPE="<%= codebuildComputeType %>"
|
|
32
|
-
|
|
32
|
+
# CODEBUILD_PROJECT_NAME ā derived in do/submit at runtime
|
|
33
33
|
<% } %>
|
|
34
34
|
|
|
35
35
|
# Deployment configuration ā WHERE the model runs
|
|
@@ -83,26 +83,9 @@ export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
|
83
83
|
<% } %>
|
|
84
84
|
|
|
85
85
|
# Async-specific configuration
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
<% if (asyncS3OutputPath) { %>
|
|
90
|
-
export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
|
|
91
|
-
<% } else { %>
|
|
92
|
-
export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
93
|
-
<% } %>
|
|
94
|
-
|
|
95
|
-
<% if (asyncSnsSuccessTopic) { %>
|
|
96
|
-
export ASYNC_SNS_SUCCESS_TOPIC="<%= asyncSnsSuccessTopic %>"
|
|
97
|
-
<% } else { %>
|
|
98
|
-
export ASYNC_SNS_SUCCESS_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-success"
|
|
99
|
-
<% } %>
|
|
100
|
-
|
|
101
|
-
<% if (asyncSnsErrorTopic) { %>
|
|
102
|
-
export ASYNC_SNS_ERROR_TOPIC="<%= asyncSnsErrorTopic %>"
|
|
103
|
-
<% } else { %>
|
|
104
|
-
export ASYNC_SNS_ERROR_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-error"
|
|
105
|
-
<% } %>
|
|
86
|
+
# āā Profile-resolved values (from ~/.ml-container-creator/config.json) āāāā
|
|
87
|
+
# ASYNC_S3_OUTPUT_PATH, ASYNC_SNS_SUCCESS_TOPIC, ASYNC_SNS_ERROR_TOPIC ā see do/lib/profile.sh
|
|
88
|
+
# ACCOUNT_ID ā derived inline in consuming scripts (do/deploy.d/async-inference)
|
|
106
89
|
|
|
107
90
|
<% if (asyncMaxConcurrentInvocations) { %>
|
|
108
91
|
export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
|
|
@@ -129,19 +112,10 @@ export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
|
|
|
129
112
|
# SageMaker Batch Transform configuration
|
|
130
113
|
export INSTANCE_TYPE="<%= instanceType %>"
|
|
131
114
|
|
|
132
|
-
#
|
|
133
|
-
|
|
115
|
+
# āā Profile-resolved values (from ~/.ml-container-creator/config.json) āāāā
|
|
116
|
+
# BATCH_INPUT_PATH, BATCH_OUTPUT_PATH ā see do/lib/profile.sh
|
|
117
|
+
# ACCOUNT_ID ā derived inline in consuming scripts (do/deploy.d/batch-transform)
|
|
134
118
|
|
|
135
|
-
<% if (batchInputPath) { %>
|
|
136
|
-
export BATCH_INPUT_PATH="<%= batchInputPath %>"
|
|
137
|
-
<% } else { %>
|
|
138
|
-
export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
|
|
139
|
-
<% } %>
|
|
140
|
-
<% if (batchOutputPath) { %>
|
|
141
|
-
export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
|
|
142
|
-
<% } else { %>
|
|
143
|
-
export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
144
|
-
<% } %>
|
|
145
119
|
export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
|
|
146
120
|
export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
|
|
147
121
|
export BATCH_STRATEGY="<%= batchStrategy %>"
|
|
@@ -256,6 +230,8 @@ export NGC_API_KEY="<%= ngcApiKey %>"
|
|
|
256
230
|
|
|
257
231
|
<% if (deploymentTarget !== 'batch-transform') { %>
|
|
258
232
|
# Managed Model Customization (do/tune)
|
|
233
|
+
# āā Profile-resolved values (from ~/.ml-container-creator/config.json) āāāā
|
|
234
|
+
# TUNE_S3_BUCKET ā see do/lib/profile.sh
|
|
259
235
|
export TUNE_SUPPORTED=<%= (typeof tuneSupported !== 'undefined' && tuneSupported) ? 'true' : 'false' %>
|
|
260
236
|
<% if (typeof tuneSupported !== 'undefined' && tuneSupported) { %>
|
|
261
237
|
<% if (typeof tuneModelId !== 'undefined' && tuneModelId) { %>
|
|
@@ -270,7 +246,6 @@ export TUNE_MODEL_ID="<%= tuneModelId %>"
|
|
|
270
246
|
# export TUNE_MODEL_ID=""
|
|
271
247
|
<% } %>
|
|
272
248
|
<% } %>
|
|
273
|
-
export TUNE_S3_BUCKET="mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
274
249
|
# MLflow App ARN for experiment tracking (set by bootstrap, or override manually)
|
|
275
250
|
# export MLFLOW_APP_ARN=""
|
|
276
251
|
<% } %>
|
|
@@ -296,13 +271,6 @@ export MODEL_FORMAT="<%= modelFormat %>"
|
|
|
296
271
|
# export MODEL_FORMAT=""
|
|
297
272
|
<% } %>
|
|
298
273
|
|
|
299
|
-
<% if (roleArn) { %>
|
|
300
|
-
export ROLE_ARN="<%= roleArn %>"
|
|
301
|
-
<% } else { %>
|
|
302
|
-
# IAM execution role for SageMaker (uncomment to override bootstrap role)
|
|
303
|
-
# export ROLE_ARN=""
|
|
304
|
-
<% } %>
|
|
305
|
-
|
|
306
274
|
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
307
275
|
# SageMaker AI Benchmarking configuration
|
|
308
276
|
export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
|
|
@@ -353,7 +321,6 @@ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
|
|
|
353
321
|
<% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
354
322
|
export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
|
|
355
323
|
<% } %>
|
|
356
|
-
export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
|
|
357
324
|
|
|
358
325
|
# Print configuration summary
|
|
359
326
|
echo "āļø Configuration loaded"
|
|
@@ -38,6 +38,18 @@ done
|
|
|
38
38
|
# Source configuration
|
|
39
39
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
40
|
source "${SCRIPT_DIR}/config"
|
|
41
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
|
+
|
|
43
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
44
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
|
|
45
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
46
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
47
|
+
|
|
48
|
+
# Async-specific derived variables
|
|
49
|
+
_ASYNC_BUCKET="${_PROFILE[asyncS3Bucket]:-mlcc-async-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
50
|
+
ASYNC_S3_OUTPUT_PATH="${ASYNC_S3_OUTPUT_PATH:-s3://${_ASYNC_BUCKET}/${PROJECT_NAME}/output/}"
|
|
51
|
+
ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
|
|
52
|
+
ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
|
|
41
53
|
|
|
42
54
|
echo "š Deploying to AWS"
|
|
43
55
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -137,16 +149,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
|
|
|
137
149
|
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
138
150
|
resolve_secrets
|
|
139
151
|
|
|
152
|
+
<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
|
|
153
|
+
# ============================================================
|
|
154
|
+
# Inject server environment variables into container Environment
|
|
155
|
+
# ============================================================
|
|
156
|
+
<% Object.keys(serverEnvVars).forEach(function(key) { %>
|
|
157
|
+
if [ -n "${<%= key %>:-}" ]; then
|
|
158
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
159
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
|
|
160
|
+
else
|
|
161
|
+
CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
|
|
162
|
+
fi
|
|
163
|
+
fi
|
|
164
|
+
<% }); %>
|
|
165
|
+
<% } %>
|
|
166
|
+
|
|
140
167
|
# Validate execution role ARN
|
|
141
168
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
142
|
-
echo "ā
|
|
169
|
+
echo "ā ROLE_ARN is not set."
|
|
170
|
+
echo " Run 'ml-container-creator bootstrap' to configure your profile,"
|
|
171
|
+
echo " or set ROLE_ARN as an environment variable."
|
|
143
172
|
echo ""
|
|
144
173
|
echo "Usage:"
|
|
145
174
|
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
146
175
|
echo " ./do/deploy"
|
|
147
176
|
echo ""
|
|
148
|
-
echo "Or set ROLE_ARN in do/config"
|
|
149
|
-
echo ""
|
|
150
177
|
echo "The execution role must have permissions for:"
|
|
151
178
|
echo " ⢠SageMaker model and endpoint management"
|
|
152
179
|
echo " ⢠ECR image access"
|
|
@@ -38,6 +38,17 @@ done
|
|
|
38
38
|
# Source configuration
|
|
39
39
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
40
|
source "${SCRIPT_DIR}/config"
|
|
41
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
|
+
|
|
43
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
44
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
|
|
45
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
46
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
47
|
+
|
|
48
|
+
# Batch-specific derived variables
|
|
49
|
+
_BATCH_BUCKET="${_PROFILE[batchS3Bucket]:-mlcc-batch-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
50
|
+
BATCH_INPUT_PATH="${BATCH_INPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/input/}"
|
|
51
|
+
BATCH_OUTPUT_PATH="${BATCH_OUTPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/output/}"
|
|
41
52
|
|
|
42
53
|
echo "š Deploying to AWS"
|
|
43
54
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -135,16 +146,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
|
|
|
135
146
|
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
136
147
|
resolve_secrets
|
|
137
148
|
|
|
149
|
+
<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
|
|
150
|
+
# ============================================================
|
|
151
|
+
# Inject server environment variables into container Environment
|
|
152
|
+
# ============================================================
|
|
153
|
+
<% Object.keys(serverEnvVars).forEach(function(key) { %>
|
|
154
|
+
if [ -n "${<%= key %>:-}" ]; then
|
|
155
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
156
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
|
|
157
|
+
else
|
|
158
|
+
CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
|
|
159
|
+
fi
|
|
160
|
+
fi
|
|
161
|
+
<% }); %>
|
|
162
|
+
<% } %>
|
|
163
|
+
|
|
138
164
|
# Validate execution role ARN
|
|
139
165
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
140
|
-
echo "ā
|
|
166
|
+
echo "ā ROLE_ARN is not set."
|
|
167
|
+
echo " Run 'ml-container-creator bootstrap' to configure your profile,"
|
|
168
|
+
echo " or set ROLE_ARN as an environment variable."
|
|
141
169
|
echo ""
|
|
142
170
|
echo "Usage:"
|
|
143
171
|
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
144
172
|
echo " ./do/deploy"
|
|
145
173
|
echo ""
|
|
146
|
-
echo "Or set ROLE_ARN in do/config"
|
|
147
|
-
echo ""
|
|
148
174
|
echo "The execution role must have permissions for:"
|
|
149
175
|
echo " ⢠SageMaker model and transform job management"
|
|
150
176
|
echo " ⢠ECR image access"
|
|
@@ -38,6 +38,10 @@ done
|
|
|
38
38
|
# Source configuration
|
|
39
39
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
40
|
source "${SCRIPT_DIR}/config"
|
|
41
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
|
+
|
|
43
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
44
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
41
45
|
|
|
42
46
|
echo "š Deploying to AWS"
|
|
43
47
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -211,6 +211,12 @@ _ci_handle_existing_endpoint() {
|
|
|
211
211
|
# Source configuration
|
|
212
212
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
213
213
|
source "${SCRIPT_DIR}/config"
|
|
214
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
215
|
+
|
|
216
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
217
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
|
|
218
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
219
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
214
220
|
|
|
215
221
|
echo "š Deploying to AWS"
|
|
216
222
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -307,14 +313,14 @@ source "${SCRIPT_DIR}/lib/inference-component.sh"
|
|
|
307
313
|
|
|
308
314
|
# Validate execution role ARN
|
|
309
315
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
310
|
-
echo "ā
|
|
316
|
+
echo "ā ROLE_ARN is not set."
|
|
317
|
+
echo " Run 'ml-container-creator bootstrap' to configure your profile,"
|
|
318
|
+
echo " or set ROLE_ARN as an environment variable."
|
|
311
319
|
echo ""
|
|
312
320
|
echo "Usage:"
|
|
313
321
|
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
314
322
|
echo " ./do/deploy"
|
|
315
323
|
echo ""
|
|
316
|
-
echo "Or set ROLE_ARN in do/config"
|
|
317
|
-
echo ""
|
|
318
324
|
echo "The execution role must have permissions for:"
|
|
319
325
|
echo " ⢠SageMaker endpoint and inference component management"
|
|
320
326
|
echo " ⢠ECR image access"
|
|
@@ -350,6 +356,21 @@ fi
|
|
|
350
356
|
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
351
357
|
resolve_secrets
|
|
352
358
|
|
|
359
|
+
<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
|
|
360
|
+
# ============================================================
|
|
361
|
+
# Inject server environment variables into container Environment
|
|
362
|
+
# ============================================================
|
|
363
|
+
<% Object.keys(serverEnvVars).forEach(function(key) { %>
|
|
364
|
+
if [ -n "${<%= key %>:-}" ]; then
|
|
365
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
366
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
|
|
367
|
+
else
|
|
368
|
+
CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
|
|
369
|
+
fi
|
|
370
|
+
fi
|
|
371
|
+
<% }); %>
|
|
372
|
+
<% } %>
|
|
373
|
+
|
|
353
374
|
# ============================================================
|
|
354
375
|
# CI-Mode: Idempotent deployment check (runs before normal idempotency)
|
|
355
376
|
# ============================================================
|
|
@@ -165,7 +165,7 @@ create_endpoint_config() {
|
|
|
165
165
|
# Optional: capacity reservation
|
|
166
166
|
if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
|
|
167
167
|
variant_json="${variant_json},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
|
|
168
|
-
echo "
|
|
168
|
+
echo " ā Capacity reservation: ${CAPACITY_RESERVATION_ARN}"
|
|
169
169
|
fi
|
|
170
170
|
|
|
171
171
|
variant_json="${variant_json}}]"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Profile loader ā reads active bootstrap profile into _PROFILE[] associative array.
|
|
3
|
+
# Source this file after do/config. Values provide defaults; explicit env vars take precedence.
|
|
4
|
+
#
|
|
5
|
+
# Requires bash 4+ for associative array support.
|
|
6
|
+
# macOS ships with bash 3.2 ā install bash 4+ via Homebrew: brew install bash
|
|
7
|
+
#
|
|
8
|
+
# Expected keys in _PROFILE:
|
|
9
|
+
# awsRegion, accountId, awsProfile, roleArn, ecrRepositoryName,
|
|
10
|
+
# benchmarkS3Bucket, ciBenchmarkResultsBucket, asyncS3Bucket, batchS3Bucket,
|
|
11
|
+
# ciTableName, ciInfraProvisioned
|
|
12
|
+
|
|
13
|
+
# Temporarily disable unbound variable checking for profile loading
|
|
14
|
+
# (keys may not exist in the profile config, and declare -A behavior
|
|
15
|
+
# varies across bash versions with set -u)
|
|
16
|
+
set +u 2>/dev/null || true
|
|
17
|
+
|
|
18
|
+
declare -A _PROFILE 2>/dev/null || true
|
|
19
|
+
if command -v python3 &>/dev/null; then
|
|
20
|
+
_PROFILE_RAW=$(python3 -c "
|
|
21
|
+
import json, os
|
|
22
|
+
try:
|
|
23
|
+
with open(os.path.expanduser('~/.ml-container-creator/config.json')) as f:
|
|
24
|
+
c = json.load(f)
|
|
25
|
+
p = c['profiles'][c['activeProfile']]
|
|
26
|
+
# Output as KEY=VALUE lines (simple, no JSON parsing in bash)
|
|
27
|
+
for k, v in p.items():
|
|
28
|
+
if isinstance(v, (str, int, float, bool)):
|
|
29
|
+
print(f'{k}={v}')
|
|
30
|
+
except:
|
|
31
|
+
pass
|
|
32
|
+
" 2>/dev/null) || _PROFILE_RAW=""
|
|
33
|
+
|
|
34
|
+
if [ -n "${_PROFILE_RAW}" ]; then
|
|
35
|
+
while IFS='=' read -r key value; do
|
|
36
|
+
[ -n "${key}" ] && _PROFILE["${key}"]="${value}"
|
|
37
|
+
done <<< "${_PROFILE_RAW}"
|
|
38
|
+
fi
|
|
39
|
+
fi
|
|
40
|
+
|
|
41
|
+
# NOTE: set -u is NOT re-enabled here. The caller is responsible for managing
|
|
42
|
+
# their own shell options. Re-enabling set -u would cause "unbound variable"
|
|
43
|
+
# errors when accessing _PROFILE keys on bash versions where empty associative
|
|
44
|
+
# arrays are treated as unset (bash 5.x on some platforms).
|
package/templates/do/push
CHANGED
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
echo "š Pushing Docker image to Amazon ECR"
|
|
14
19
|
echo " Project: ${PROJECT_NAME}"
|
package/templates/do/register
CHANGED
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# āā Profile-resolved variables (env var > profile > default) āāāāāāāāāāāāāāāāāā
|
|
15
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
|
|
16
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
12
17
|
|
|
13
18
|
# ============================================================
|
|
14
19
|
# Register deployment to the deployment registry
|