@aws/ml-container-creator 0.10.3 → 0.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/parameter-schema-v2.json +28 -1
- package/infra/ci-harness/lib/ci-harness-stack.ts +50 -36
- package/package.json +14 -5
- package/servers/instance-sizer/index.js +30 -17
- package/servers/instance-sizer/lib/instance-ranker.js +44 -0
- package/servers/lib/catalogs/instances.json +27 -0
- package/src/app.js +22 -1
- package/src/lib/bootstrap-command-handler.js +32 -3
- package/src/lib/config-validator.js +1 -1
- package/src/lib/generated/cli-options.js +7 -2
- package/src/lib/generated/parameter-matrix.js +16 -5
- package/src/lib/generated/validation-rules.js +7 -3
- package/src/lib/path-prover-brain.js +58 -1
- package/src/lib/prompts/infrastructure-prompts.js +2 -2
- package/src/lib/prompts/model-prompts.js +6 -0
- package/src/lib/prove-pipeline-executor.js +294 -0
- package/src/lib/secrets-prompt-runner.js +4 -0
- package/src/lib/template-manager.js +1 -1
- package/src/lib/template-variable-resolver.js +62 -0
- package/templates/do/README.md +37 -0
- package/templates/do/adapter +8 -0
- package/templates/do/build +8 -0
- package/templates/do/clean.d/async-inference.ejs +8 -0
- package/templates/do/clean.d/batch-transform.ejs +8 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +8 -0
- package/templates/do/clean.d/managed-inference.ejs +8 -0
- package/templates/do/config +12 -45
- package/templates/do/deploy.d/async-inference.ejs +33 -3
- package/templates/do/deploy.d/batch-transform.ejs +32 -3
- package/templates/do/deploy.d/hyperpod-eks.ejs +7 -0
- package/templates/do/deploy.d/managed-inference.ejs +27 -3
- package/templates/do/lib/endpoint-config.sh +1 -1
- package/templates/do/lib/profile.sh +44 -0
- package/templates/do/lib/staged-assets.sh +217 -0
- package/templates/do/push +8 -0
- package/templates/do/register +8 -0
- package/templates/do/stage +569 -0
- package/templates/do/submit +10 -0
- package/templates/do/test +1 -0
- package/templates/do/tune +7 -0
package/templates/do/config
CHANGED
|
@@ -13,23 +13,23 @@ export MODEL_SERVER="<%= modelServer %>"
|
|
|
13
13
|
|
|
14
14
|
# AWS configuration
|
|
15
15
|
export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
|
|
18
|
+
# ECR_REPOSITORY_NAME, ROLE_ARN, ADAPTER_S3_BUCKET — see do/lib/profile.sh
|
|
17
19
|
|
|
18
20
|
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
19
21
|
# LoRA adapter serving
|
|
20
22
|
export ENABLE_LORA=true
|
|
21
|
-
export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
22
23
|
<% } else if (framework === 'transformers' || framework === 'diffusors') { %>
|
|
23
24
|
# LoRA adapter serving (uncomment to enable)
|
|
24
25
|
# export ENABLE_LORA=true
|
|
25
|
-
# export ADAPTER_S3_BUCKET="mlcc-adapters-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
26
26
|
<% } %>
|
|
27
27
|
|
|
28
28
|
# Build configuration — WHERE the Docker image gets built
|
|
29
29
|
export BUILD_TARGET="<%= buildTarget %>"
|
|
30
30
|
<% if (buildTarget === 'codebuild') { %>
|
|
31
31
|
export CODEBUILD_COMPUTE_TYPE="<%= codebuildComputeType %>"
|
|
32
|
-
|
|
32
|
+
# CODEBUILD_PROJECT_NAME — derived in do/submit at runtime
|
|
33
33
|
<% } %>
|
|
34
34
|
|
|
35
35
|
# Deployment configuration — WHERE the model runs
|
|
@@ -83,26 +83,9 @@ export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
|
|
|
83
83
|
<% } %>
|
|
84
84
|
|
|
85
85
|
# Async-specific configuration
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
<% if (asyncS3OutputPath) { %>
|
|
90
|
-
export ASYNC_S3_OUTPUT_PATH="<%= asyncS3OutputPath %>"
|
|
91
|
-
<% } else { %>
|
|
92
|
-
export ASYNC_S3_OUTPUT_PATH="s3://mlcc-async-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
93
|
-
<% } %>
|
|
94
|
-
|
|
95
|
-
<% if (asyncSnsSuccessTopic) { %>
|
|
96
|
-
export ASYNC_SNS_SUCCESS_TOPIC="<%= asyncSnsSuccessTopic %>"
|
|
97
|
-
<% } else { %>
|
|
98
|
-
export ASYNC_SNS_SUCCESS_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-success"
|
|
99
|
-
<% } %>
|
|
100
|
-
|
|
101
|
-
<% if (asyncSnsErrorTopic) { %>
|
|
102
|
-
export ASYNC_SNS_ERROR_TOPIC="<%= asyncSnsErrorTopic %>"
|
|
103
|
-
<% } else { %>
|
|
104
|
-
export ASYNC_SNS_ERROR_TOPIC="arn:aws:sns:${AWS_REGION}:${ACCOUNT_ID}:ml-container-creator-${PROJECT_NAME}-async-error"
|
|
105
|
-
<% } %>
|
|
86
|
+
# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
|
|
87
|
+
# ASYNC_S3_OUTPUT_PATH, ASYNC_SNS_SUCCESS_TOPIC, ASYNC_SNS_ERROR_TOPIC — see do/lib/profile.sh
|
|
88
|
+
# ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/async-inference)
|
|
106
89
|
|
|
107
90
|
<% if (asyncMaxConcurrentInvocations) { %>
|
|
108
91
|
export ASYNC_MAX_CONCURRENT_INVOCATIONS="<%= asyncMaxConcurrentInvocations %>"
|
|
@@ -129,19 +112,10 @@ export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
|
|
|
129
112
|
# SageMaker Batch Transform configuration
|
|
130
113
|
export INSTANCE_TYPE="<%= instanceType %>"
|
|
131
114
|
|
|
132
|
-
#
|
|
133
|
-
|
|
115
|
+
# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
|
|
116
|
+
# BATCH_INPUT_PATH, BATCH_OUTPUT_PATH — see do/lib/profile.sh
|
|
117
|
+
# ACCOUNT_ID — derived inline in consuming scripts (do/deploy.d/batch-transform)
|
|
134
118
|
|
|
135
|
-
<% if (batchInputPath) { %>
|
|
136
|
-
export BATCH_INPUT_PATH="<%= batchInputPath %>"
|
|
137
|
-
<% } else { %>
|
|
138
|
-
export BATCH_INPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/input/"
|
|
139
|
-
<% } %>
|
|
140
|
-
<% if (batchOutputPath) { %>
|
|
141
|
-
export BATCH_OUTPUT_PATH="<%= batchOutputPath %>"
|
|
142
|
-
<% } else { %>
|
|
143
|
-
export BATCH_OUTPUT_PATH="s3://mlcc-batch-${ACCOUNT_ID}-${AWS_REGION}/${PROJECT_NAME}/output/"
|
|
144
|
-
<% } %>
|
|
145
119
|
export BATCH_INSTANCE_COUNT="<%= batchInstanceCount %>"
|
|
146
120
|
export BATCH_SPLIT_TYPE="<%= batchSplitType %>"
|
|
147
121
|
export BATCH_STRATEGY="<%= batchStrategy %>"
|
|
@@ -256,6 +230,8 @@ export NGC_API_KEY="<%= ngcApiKey %>"
|
|
|
256
230
|
|
|
257
231
|
<% if (deploymentTarget !== 'batch-transform') { %>
|
|
258
232
|
# Managed Model Customization (do/tune)
|
|
233
|
+
# ── Profile-resolved values (from ~/.ml-container-creator/config.json) ────
|
|
234
|
+
# TUNE_S3_BUCKET — see do/lib/profile.sh
|
|
259
235
|
export TUNE_SUPPORTED=<%= (typeof tuneSupported !== 'undefined' && tuneSupported) ? 'true' : 'false' %>
|
|
260
236
|
<% if (typeof tuneSupported !== 'undefined' && tuneSupported) { %>
|
|
261
237
|
<% if (typeof tuneModelId !== 'undefined' && tuneModelId) { %>
|
|
@@ -270,7 +246,6 @@ export TUNE_MODEL_ID="<%= tuneModelId %>"
|
|
|
270
246
|
# export TUNE_MODEL_ID=""
|
|
271
247
|
<% } %>
|
|
272
248
|
<% } %>
|
|
273
|
-
export TUNE_S3_BUCKET="mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo 'UNKNOWN')-${AWS_REGION}"
|
|
274
249
|
# MLflow App ARN for experiment tracking (set by bootstrap, or override manually)
|
|
275
250
|
# export MLFLOW_APP_ARN=""
|
|
276
251
|
<% } %>
|
|
@@ -296,13 +271,6 @@ export MODEL_FORMAT="<%= modelFormat %>"
|
|
|
296
271
|
# export MODEL_FORMAT=""
|
|
297
272
|
<% } %>
|
|
298
273
|
|
|
299
|
-
<% if (roleArn) { %>
|
|
300
|
-
export ROLE_ARN="<%= roleArn %>"
|
|
301
|
-
<% } else { %>
|
|
302
|
-
# IAM execution role for SageMaker (uncomment to override bootstrap role)
|
|
303
|
-
# export ROLE_ARN=""
|
|
304
|
-
<% } %>
|
|
305
|
-
|
|
306
274
|
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
307
275
|
# SageMaker AI Benchmarking configuration
|
|
308
276
|
export BENCHMARK_CONCURRENCY="<%= benchmarkConcurrency %>"
|
|
@@ -353,7 +321,6 @@ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage || '' %>}
|
|
|
353
321
|
<% if ((deploymentTarget === 'realtime-inference' && !(typeof existingEndpointName !== 'undefined' && existingEndpointName)) || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
|
|
354
322
|
export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
|
|
355
323
|
<% } %>
|
|
356
|
-
export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
|
|
357
324
|
|
|
358
325
|
# Print configuration summary
|
|
359
326
|
echo "⚙️ Configuration loaded"
|
|
@@ -38,6 +38,21 @@ done
|
|
|
38
38
|
# Source configuration
|
|
39
39
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
40
|
source "${SCRIPT_DIR}/config"
|
|
41
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
|
+
|
|
43
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
44
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
45
|
+
set +u
|
|
46
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
|
|
47
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
48
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
49
|
+
|
|
50
|
+
# Async-specific derived variables
|
|
51
|
+
_ASYNC_BUCKET="${_PROFILE[asyncS3Bucket]:-mlcc-async-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
52
|
+
ASYNC_S3_OUTPUT_PATH="${ASYNC_S3_OUTPUT_PATH:-s3://${_ASYNC_BUCKET}/${PROJECT_NAME}/output/}"
|
|
53
|
+
ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
|
|
54
|
+
ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
|
|
55
|
+
set -u
|
|
41
56
|
|
|
42
57
|
echo "🚀 Deploying to AWS"
|
|
43
58
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -137,16 +152,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
|
|
|
137
152
|
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
138
153
|
resolve_secrets
|
|
139
154
|
|
|
155
|
+
<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
|
|
156
|
+
# ============================================================
|
|
157
|
+
# Inject server environment variables into container Environment
|
|
158
|
+
# ============================================================
|
|
159
|
+
<% Object.keys(serverEnvVars).forEach(function(key) { %>
|
|
160
|
+
if [ -n "${<%= key %>:-}" ]; then
|
|
161
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
162
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
|
|
163
|
+
else
|
|
164
|
+
CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
|
|
165
|
+
fi
|
|
166
|
+
fi
|
|
167
|
+
<% }); %>
|
|
168
|
+
<% } %>
|
|
169
|
+
|
|
140
170
|
# Validate execution role ARN
|
|
141
171
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
142
|
-
echo "❌
|
|
172
|
+
echo "❌ ROLE_ARN is not set."
|
|
173
|
+
echo " Run 'ml-container-creator bootstrap' to configure your profile,"
|
|
174
|
+
echo " or set ROLE_ARN as an environment variable."
|
|
143
175
|
echo ""
|
|
144
176
|
echo "Usage:"
|
|
145
177
|
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
146
178
|
echo " ./do/deploy"
|
|
147
179
|
echo ""
|
|
148
|
-
echo "Or set ROLE_ARN in do/config"
|
|
149
|
-
echo ""
|
|
150
180
|
echo "The execution role must have permissions for:"
|
|
151
181
|
echo " • SageMaker model and endpoint management"
|
|
152
182
|
echo " • ECR image access"
|
|
@@ -38,6 +38,20 @@ done
|
|
|
38
38
|
# Source configuration
|
|
39
39
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
40
|
source "${SCRIPT_DIR}/config"
|
|
41
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
|
+
|
|
43
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
44
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
45
|
+
set +u
|
|
46
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
|
|
47
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
48
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
49
|
+
|
|
50
|
+
# Batch-specific derived variables
|
|
51
|
+
_BATCH_BUCKET="${_PROFILE[batchS3Bucket]:-mlcc-batch-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
52
|
+
BATCH_INPUT_PATH="${BATCH_INPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/input/}"
|
|
53
|
+
BATCH_OUTPUT_PATH="${BATCH_OUTPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/output/}"
|
|
54
|
+
set -u
|
|
41
55
|
|
|
42
56
|
echo "🚀 Deploying to AWS"
|
|
43
57
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -135,16 +149,31 @@ source "${SCRIPT_DIR}/lib/wait.sh"
|
|
|
135
149
|
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
136
150
|
resolve_secrets
|
|
137
151
|
|
|
152
|
+
<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
|
|
153
|
+
# ============================================================
|
|
154
|
+
# Inject server environment variables into container Environment
|
|
155
|
+
# ============================================================
|
|
156
|
+
<% Object.keys(serverEnvVars).forEach(function(key) { %>
|
|
157
|
+
if [ -n "${<%= key %>:-}" ]; then
|
|
158
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
159
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
|
|
160
|
+
else
|
|
161
|
+
CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
|
|
162
|
+
fi
|
|
163
|
+
fi
|
|
164
|
+
<% }); %>
|
|
165
|
+
<% } %>
|
|
166
|
+
|
|
138
167
|
# Validate execution role ARN
|
|
139
168
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
140
|
-
echo "❌
|
|
169
|
+
echo "❌ ROLE_ARN is not set."
|
|
170
|
+
echo " Run 'ml-container-creator bootstrap' to configure your profile,"
|
|
171
|
+
echo " or set ROLE_ARN as an environment variable."
|
|
141
172
|
echo ""
|
|
142
173
|
echo "Usage:"
|
|
143
174
|
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
144
175
|
echo " ./do/deploy"
|
|
145
176
|
echo ""
|
|
146
|
-
echo "Or set ROLE_ARN in do/config"
|
|
147
|
-
echo ""
|
|
148
177
|
echo "The execution role must have permissions for:"
|
|
149
178
|
echo " • SageMaker model and transform job management"
|
|
150
179
|
echo " • ECR image access"
|
|
@@ -38,6 +38,13 @@ done
|
|
|
38
38
|
# Source configuration
|
|
39
39
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
40
|
source "${SCRIPT_DIR}/config"
|
|
41
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
|
+
|
|
43
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
44
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
45
|
+
set +u
|
|
46
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
47
|
+
set -u
|
|
41
48
|
|
|
42
49
|
echo "🚀 Deploying to AWS"
|
|
43
50
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -211,6 +211,15 @@ _ci_handle_existing_endpoint() {
|
|
|
211
211
|
# Source configuration
|
|
212
212
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
213
213
|
source "${SCRIPT_DIR}/config"
|
|
214
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
215
|
+
|
|
216
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
217
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
218
|
+
set +u
|
|
219
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
|
|
220
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
221
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
222
|
+
set -u
|
|
214
223
|
|
|
215
224
|
echo "🚀 Deploying to AWS"
|
|
216
225
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -307,14 +316,14 @@ source "${SCRIPT_DIR}/lib/inference-component.sh"
|
|
|
307
316
|
|
|
308
317
|
# Validate execution role ARN
|
|
309
318
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
310
|
-
echo "❌
|
|
319
|
+
echo "❌ ROLE_ARN is not set."
|
|
320
|
+
echo " Run 'ml-container-creator bootstrap' to configure your profile,"
|
|
321
|
+
echo " or set ROLE_ARN as an environment variable."
|
|
311
322
|
echo ""
|
|
312
323
|
echo "Usage:"
|
|
313
324
|
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
314
325
|
echo " ./do/deploy"
|
|
315
326
|
echo ""
|
|
316
|
-
echo "Or set ROLE_ARN in do/config"
|
|
317
|
-
echo ""
|
|
318
327
|
echo "The execution role must have permissions for:"
|
|
319
328
|
echo " • SageMaker endpoint and inference component management"
|
|
320
329
|
echo " • ECR image access"
|
|
@@ -350,6 +359,21 @@ fi
|
|
|
350
359
|
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
351
360
|
resolve_secrets
|
|
352
361
|
|
|
362
|
+
<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
|
|
363
|
+
# ============================================================
|
|
364
|
+
# Inject server environment variables into container Environment
|
|
365
|
+
# ============================================================
|
|
366
|
+
<% Object.keys(serverEnvVars).forEach(function(key) { %>
|
|
367
|
+
if [ -n "${<%= key %>:-}" ]; then
|
|
368
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
369
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
|
|
370
|
+
else
|
|
371
|
+
CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
|
|
372
|
+
fi
|
|
373
|
+
fi
|
|
374
|
+
<% }); %>
|
|
375
|
+
<% } %>
|
|
376
|
+
|
|
353
377
|
# ============================================================
|
|
354
378
|
# CI-Mode: Idempotent deployment check (runs before normal idempotency)
|
|
355
379
|
# ============================================================
|
|
@@ -165,7 +165,7 @@ create_endpoint_config() {
|
|
|
165
165
|
# Optional: capacity reservation
|
|
166
166
|
if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
|
|
167
167
|
variant_json="${variant_json},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
|
|
168
|
-
echo "
|
|
168
|
+
echo " ✓ Capacity reservation: ${CAPACITY_RESERVATION_ARN}"
|
|
169
169
|
fi
|
|
170
170
|
|
|
171
171
|
variant_json="${variant_json}}]"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Profile loader — reads active bootstrap profile into _PROFILE[] associative array.
|
|
3
|
+
# Source this file after do/config. Values provide defaults; explicit env vars take precedence.
|
|
4
|
+
#
|
|
5
|
+
# Requires bash 4+ for associative array support.
|
|
6
|
+
# macOS ships with bash 3.2 — install bash 4+ via Homebrew: brew install bash
|
|
7
|
+
#
|
|
8
|
+
# Expected keys in _PROFILE:
|
|
9
|
+
# awsRegion, accountId, awsProfile, roleArn, ecrRepositoryName,
|
|
10
|
+
# benchmarkS3Bucket, ciBenchmarkResultsBucket, asyncS3Bucket, batchS3Bucket,
|
|
11
|
+
# ciTableName, ciInfraProvisioned
|
|
12
|
+
|
|
13
|
+
# Temporarily disable unbound variable checking for profile loading
|
|
14
|
+
# (keys may not exist in the profile config, and declare -A behavior
|
|
15
|
+
# varies across bash versions with set -u)
|
|
16
|
+
set +u 2>/dev/null || true
|
|
17
|
+
|
|
18
|
+
declare -A _PROFILE 2>/dev/null || true
|
|
19
|
+
if command -v python3 &>/dev/null; then
|
|
20
|
+
_PROFILE_RAW=$(python3 -c "
|
|
21
|
+
import json, os
|
|
22
|
+
try:
|
|
23
|
+
with open(os.path.expanduser('~/.ml-container-creator/config.json')) as f:
|
|
24
|
+
c = json.load(f)
|
|
25
|
+
p = c['profiles'][c['activeProfile']]
|
|
26
|
+
# Output as KEY=VALUE lines (simple, no JSON parsing in bash)
|
|
27
|
+
for k, v in p.items():
|
|
28
|
+
if isinstance(v, (str, int, float, bool)):
|
|
29
|
+
print(f'{k}={v}')
|
|
30
|
+
except:
|
|
31
|
+
pass
|
|
32
|
+
" 2>/dev/null) || _PROFILE_RAW=""
|
|
33
|
+
|
|
34
|
+
if [ -n "${_PROFILE_RAW}" ]; then
|
|
35
|
+
while IFS='=' read -r key value; do
|
|
36
|
+
[ -n "${key}" ] && _PROFILE["${key}"]="${value}"
|
|
37
|
+
done <<< "${_PROFILE_RAW}"
|
|
38
|
+
fi
|
|
39
|
+
fi
|
|
40
|
+
|
|
41
|
+
# NOTE: set -u is NOT re-enabled here. The caller is responsible for managing
|
|
42
|
+
# their own shell options. Re-enabling set -u would cause "unbound variable"
|
|
43
|
+
# errors when accessing _PROFILE keys on bash versions where empty associative
|
|
44
|
+
# arrays are treated as unset (bash 5.x on some platforms).
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Shared helper: read/write the .mlcc/staged-assets.json tracking file.
|
|
5
|
+
# Sourced by do/stage, do/submit, and other lifecycle scripts.
|
|
6
|
+
#
|
|
7
|
+
# ─── Schema (.mlcc/staged-assets.json) ───────────────────────────────────────
|
|
8
|
+
#
|
|
9
|
+
# {
|
|
10
|
+
# "version": "1",
|
|
11
|
+
# "models": {
|
|
12
|
+
# "<ic-name>": {
|
|
13
|
+
# "source": "<HuggingFace model ID, e.g. google/gemma-4-31B-it>",
|
|
14
|
+
# "staged_uri": "<S3 URI with trailing slash>",
|
|
15
|
+
# "staged_at": "<ISO 8601 timestamp>",
|
|
16
|
+
# "region": "<AWS region where the model was staged>",
|
|
17
|
+
# "size_gb": <numeric size in GB>
|
|
18
|
+
# }
|
|
19
|
+
# },
|
|
20
|
+
# "adapters": {}
|
|
21
|
+
# }
|
|
22
|
+
#
|
|
23
|
+
# Notes:
|
|
24
|
+
# - "version" is for forward-compatible schema evolution (start at "1")
|
|
25
|
+
# - "models" is keyed by IC name; use "default" for single-model projects
|
|
26
|
+
# - "adapters" is reserved for future LoRA adapter staging (BL-122)
|
|
27
|
+
# - This file is git-ignored (.mlcc/ contains account-specific URIs)
|
|
28
|
+
# - The file SHALL NOT be created unless a valid staging operation completes
|
|
29
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
30
|
+
|
|
31
|
+
# Path to the staged-assets file (relative to project root)
|
|
32
|
+
STAGED_ASSETS_DIR=".mlcc"
|
|
33
|
+
STAGED_ASSETS_FILE="${STAGED_ASSETS_DIR}/staged-assets.json"
|
|
34
|
+
|
|
35
|
+
# _staged_assets_has_jq()
|
|
36
|
+
# Check if jq is available on the system.
|
|
37
|
+
# Returns 0 if available, 1 if not.
|
|
38
|
+
_staged_assets_has_jq() {
|
|
39
|
+
command -v jq &>/dev/null
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# _staged_assets_warn_no_jq()
|
|
43
|
+
# Print a one-time warning when jq is not available.
|
|
44
|
+
_staged_assets_warn_no_jq() {
|
|
45
|
+
if [ -z "${_STAGED_ASSETS_JQ_WARNED:-}" ]; then
|
|
46
|
+
echo "⚠️ jq not found — using fallback parser (install jq for full functionality)" >&2
|
|
47
|
+
_STAGED_ASSETS_JQ_WARNED=1
|
|
48
|
+
fi
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# staged_assets_read_model_uri()
|
|
52
|
+
# Read the staged S3 URI for the default model from the staged-assets file.
|
|
53
|
+
# Echoes the S3 URI if found, or an empty string if not available.
|
|
54
|
+
#
|
|
55
|
+
# Uses jq when available; falls back to grep/sed extraction.
|
|
56
|
+
#
|
|
57
|
+
# Arguments: none
|
|
58
|
+
# Output: S3 URI string (stdout) or empty string
|
|
59
|
+
staged_assets_read_model_uri() {
|
|
60
|
+
local uri=""
|
|
61
|
+
|
|
62
|
+
# No file → empty string
|
|
63
|
+
if [ ! -f "${STAGED_ASSETS_FILE}" ]; then
|
|
64
|
+
echo ""
|
|
65
|
+
return 0
|
|
66
|
+
fi
|
|
67
|
+
|
|
68
|
+
if _staged_assets_has_jq; then
|
|
69
|
+
uri=$(jq -r '.models.default.staged_uri // empty' "${STAGED_ASSETS_FILE}" 2>/dev/null) || uri=""
|
|
70
|
+
else
|
|
71
|
+
_staged_assets_warn_no_jq
|
|
72
|
+
# Fallback: grep/sed extraction for the staged_uri field within the default model block
|
|
73
|
+
# This handles the common single-model case reliably
|
|
74
|
+
uri=$(grep -A 5 '"default"' "${STAGED_ASSETS_FILE}" 2>/dev/null \
|
|
75
|
+
| grep '"staged_uri"' \
|
|
76
|
+
| sed 's/.*"staged_uri"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' \
|
|
77
|
+
| head -1) || uri=""
|
|
78
|
+
fi
|
|
79
|
+
|
|
80
|
+
echo "${uri}"
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
# staged_assets_write_model()
|
|
84
|
+
# Create or update the staged-assets file with model staging information.
|
|
85
|
+
# Creates the .mlcc directory if it does not exist.
|
|
86
|
+
#
|
|
87
|
+
# Arguments:
|
|
88
|
+
# $1 - source: HuggingFace model ID (e.g. "google/gemma-4-31B-it")
|
|
89
|
+
# $2 - uri: S3 URI where the model was staged (with trailing slash)
|
|
90
|
+
# $3 - region: AWS region where the model was staged
|
|
91
|
+
# $4 - size_gb: Total size of the staged model in GB (numeric)
|
|
92
|
+
staged_assets_write_model() {
|
|
93
|
+
local source="$1"
|
|
94
|
+
local uri="$2"
|
|
95
|
+
local region="$3"
|
|
96
|
+
local size_gb="$4"
|
|
97
|
+
local timestamp
|
|
98
|
+
timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
99
|
+
|
|
100
|
+
# Ensure the .mlcc directory exists
|
|
101
|
+
mkdir -p "${STAGED_ASSETS_DIR}"
|
|
102
|
+
|
|
103
|
+
if _staged_assets_has_jq; then
|
|
104
|
+
if [ -f "${STAGED_ASSETS_FILE}" ]; then
|
|
105
|
+
# Update existing file — merge the new model entry
|
|
106
|
+
local tmp_file="${STAGED_ASSETS_FILE}.tmp"
|
|
107
|
+
jq --arg source "${source}" \
|
|
108
|
+
--arg uri "${uri}" \
|
|
109
|
+
--arg ts "${timestamp}" \
|
|
110
|
+
--arg region "${region}" \
|
|
111
|
+
--argjson size "${size_gb}" \
|
|
112
|
+
'.models.default = {
|
|
113
|
+
"source": $source,
|
|
114
|
+
"staged_uri": $uri,
|
|
115
|
+
"staged_at": $ts,
|
|
116
|
+
"region": $region,
|
|
117
|
+
"size_gb": $size
|
|
118
|
+
}' "${STAGED_ASSETS_FILE}" > "${tmp_file}" && mv "${tmp_file}" "${STAGED_ASSETS_FILE}"
|
|
119
|
+
else
|
|
120
|
+
# Create new file from scratch
|
|
121
|
+
jq -n --arg source "${source}" \
|
|
122
|
+
--arg uri "${uri}" \
|
|
123
|
+
--arg ts "${timestamp}" \
|
|
124
|
+
--arg region "${region}" \
|
|
125
|
+
--argjson size "${size_gb}" \
|
|
126
|
+
'{
|
|
127
|
+
"version": "1",
|
|
128
|
+
"models": {
|
|
129
|
+
"default": {
|
|
130
|
+
"source": $source,
|
|
131
|
+
"staged_uri": $uri,
|
|
132
|
+
"staged_at": $ts,
|
|
133
|
+
"region": $region,
|
|
134
|
+
"size_gb": $size
|
|
135
|
+
}
|
|
136
|
+
},
|
|
137
|
+
"adapters": {}
|
|
138
|
+
}' > "${STAGED_ASSETS_FILE}"
|
|
139
|
+
fi
|
|
140
|
+
else
|
|
141
|
+
_staged_assets_warn_no_jq
|
|
142
|
+
# Fallback: write the JSON directly (create-only, no merge support without jq)
|
|
143
|
+
cat > "${STAGED_ASSETS_FILE}" << EOF
|
|
144
|
+
{
|
|
145
|
+
"version": "1",
|
|
146
|
+
"models": {
|
|
147
|
+
"default": {
|
|
148
|
+
"source": "${source}",
|
|
149
|
+
"staged_uri": "${uri}",
|
|
150
|
+
"staged_at": "${timestamp}",
|
|
151
|
+
"region": "${region}",
|
|
152
|
+
"size_gb": ${size_gb}
|
|
153
|
+
}
|
|
154
|
+
},
|
|
155
|
+
"adapters": {}
|
|
156
|
+
}
|
|
157
|
+
EOF
|
|
158
|
+
fi
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
# staged_assets_status()
|
|
162
|
+
# Print a human-readable table of all staged assets.
|
|
163
|
+
# Shows models and adapters with their source, URI, region, size, and timestamp.
|
|
164
|
+
#
|
|
165
|
+
# Arguments: none
|
|
166
|
+
# Output: formatted table to stdout
|
|
167
|
+
staged_assets_status() {
|
|
168
|
+
if [ ! -f "${STAGED_ASSETS_FILE}" ]; then
|
|
169
|
+
echo "No staged assets found."
|
|
170
|
+
echo " Run do/stage to stage model weights to S3."
|
|
171
|
+
return 0
|
|
172
|
+
fi
|
|
173
|
+
|
|
174
|
+
echo "Staged Assets (.mlcc/staged-assets.json)"
|
|
175
|
+
echo "─────────────────────────────────────────────────────────────────"
|
|
176
|
+
|
|
177
|
+
if _staged_assets_has_jq; then
|
|
178
|
+
# Print models section
|
|
179
|
+
local model_count
|
|
180
|
+
model_count=$(jq -r '.models | length' "${STAGED_ASSETS_FILE}" 2>/dev/null) || model_count=0
|
|
181
|
+
|
|
182
|
+
if [ "${model_count}" -gt 0 ]; then
|
|
183
|
+
echo ""
|
|
184
|
+
echo " Models:"
|
|
185
|
+
echo " ┌──────────────┬─────────────────────────────────┬──────────────────────────────────────────────────────┬────────────┬─────────┐"
|
|
186
|
+
printf " │ %-12s │ %-31s │ %-52s │ %-10s │ %-7s │\n" "IC Name" "Source" "S3 URI" "Region" "Size"
|
|
187
|
+
echo " ├──────────────┼─────────────────────────────────┼──────────────────────────────────────────────────────┼────────────┼─────────┤"
|
|
188
|
+
|
|
189
|
+
jq -r '.models | to_entries[] | "\(.key)\t\(.value.source)\t\(.value.staged_uri)\t\(.value.region)\t\(.value.size_gb)"' "${STAGED_ASSETS_FILE}" 2>/dev/null | \
|
|
190
|
+
while IFS=$'\t' read -r ic_name source staged_uri region size_gb; do
|
|
191
|
+
printf " │ %-12s │ %-31s │ %-52s │ %-10s │ %5s GB│\n" \
|
|
192
|
+
"${ic_name}" "${source}" "${staged_uri}" "${region}" "${size_gb}"
|
|
193
|
+
done
|
|
194
|
+
|
|
195
|
+
echo " └──────────────┴─────────────────────────────────┴──────────────────────────────────────────────────────┴────────────┴─────────┘"
|
|
196
|
+
fi
|
|
197
|
+
|
|
198
|
+
# Print adapters section (future — show placeholder if empty)
|
|
199
|
+
local adapter_count
|
|
200
|
+
adapter_count=$(jq -r '.adapters | length' "${STAGED_ASSETS_FILE}" 2>/dev/null) || adapter_count=0
|
|
201
|
+
|
|
202
|
+
if [ "${adapter_count}" -gt 0 ]; then
|
|
203
|
+
echo ""
|
|
204
|
+
echo " Adapters:"
|
|
205
|
+
jq -r '.adapters | to_entries[] | " \(.key): \(.value.staged_uri // "not staged")"' "${STAGED_ASSETS_FILE}" 2>/dev/null
|
|
206
|
+
fi
|
|
207
|
+
else
|
|
208
|
+
_staged_assets_warn_no_jq
|
|
209
|
+
# Fallback: basic display without jq
|
|
210
|
+
echo ""
|
|
211
|
+
echo " Raw contents:"
|
|
212
|
+
echo ""
|
|
213
|
+
cat "${STAGED_ASSETS_FILE}"
|
|
214
|
+
fi
|
|
215
|
+
|
|
216
|
+
echo ""
|
|
217
|
+
}
|
package/templates/do/push
CHANGED
|
@@ -9,6 +9,14 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
16
|
+
set +u
|
|
17
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
18
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
19
|
+
set -u
|
|
12
20
|
|
|
13
21
|
echo "🚀 Pushing Docker image to Amazon ECR"
|
|
14
22
|
echo " Project: ${PROJECT_NAME}"
|
package/templates/do/register
CHANGED
|
@@ -9,6 +9,14 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
# Disable unbound-variable checking for associative array access (bash 3.2 compat)
|
|
16
|
+
set +u
|
|
17
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
|
|
18
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
19
|
+
set -u
|
|
12
20
|
|
|
13
21
|
# ============================================================
|
|
14
22
|
# Register deployment to the deployment registry
|