@aws/ml-container-creator 0.10.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +9304 -0
- package/bin/cli.js +2 -0
- package/config/bootstrap-e2e-stack.json +341 -0
- package/config/bootstrap-stack.json +40 -3
- package/config/parameter-schema-v2.json +33 -22
- package/config/tune-catalog.json +1781 -0
- package/infra/ci-harness/buildspec.yml +1 -0
- package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
- package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
- package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
- package/package.json +53 -67
- package/servers/base-image-picker/index.js +121 -121
- package/servers/e2e-status/index.js +297 -0
- package/servers/e2e-status/manifest.json +14 -0
- package/servers/e2e-status/package.json +15 -0
- package/servers/endpoint-picker/LICENSE +202 -0
- package/servers/endpoint-picker/index.js +536 -0
- package/servers/endpoint-picker/manifest.json +14 -0
- package/servers/endpoint-picker/package.json +18 -0
- package/servers/hyperpod-cluster-picker/index.js +125 -125
- package/servers/instance-sizer/index.js +166 -153
- package/servers/instance-sizer/lib/instance-ranker.js +120 -76
- package/servers/instance-sizer/lib/model-resolver.js +61 -61
- package/servers/instance-sizer/lib/quota-resolver.js +113 -113
- package/servers/instance-sizer/lib/vram-estimator.js +31 -31
- package/servers/lib/bedrock-client.js +38 -38
- package/servers/lib/catalogs/instances.json +27 -0
- package/servers/lib/catalogs/model-servers.json +201 -3
- package/servers/lib/custom-validators.js +13 -13
- package/servers/lib/dynamic-resolver.js +4 -4
- package/servers/marketplace-picker/index.js +342 -0
- package/servers/marketplace-picker/manifest.json +14 -0
- package/servers/marketplace-picker/package.json +18 -0
- package/servers/model-picker/index.js +382 -382
- package/servers/region-picker/index.js +56 -56
- package/servers/workload-picker/LICENSE +202 -0
- package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
- package/servers/workload-picker/index.js +171 -0
- package/servers/workload-picker/manifest.json +16 -0
- package/servers/workload-picker/package.json +16 -0
- package/src/app.js +12 -3
- package/src/lib/bootstrap-command-handler.js +609 -15
- package/src/lib/bootstrap-config.js +36 -0
- package/src/lib/bootstrap-profile-manager.js +48 -41
- package/src/lib/ci-register-helpers.js +74 -0
- package/src/lib/config-loader.js +3 -0
- package/src/lib/config-manager.js +7 -0
- package/src/lib/config-validator.js +1 -1
- package/src/lib/cuda-resolver.js +17 -8
- package/src/lib/generated/cli-options.js +319 -314
- package/src/lib/generated/parameter-matrix.js +672 -661
- package/src/lib/generated/validation-rules.js +76 -72
- package/src/lib/path-prover-brain.js +664 -0
- package/src/lib/prompts/infrastructure-prompts.js +2 -2
- package/src/lib/prompts/model-prompts.js +6 -0
- package/src/lib/prompts/project-prompts.js +12 -0
- package/src/lib/secrets-prompt-runner.js +4 -0
- package/src/lib/template-manager.js +1 -1
- package/src/lib/template-variable-resolver.js +87 -1
- package/src/lib/tune-catalog-validator.js +37 -4
- package/templates/Dockerfile +9 -0
- package/templates/code/adapter_sidecar.py +444 -0
- package/templates/code/serve +6 -0
- package/templates/code/serve.d/vllm.ejs +1 -1
- package/templates/do/.benchmark_writer.py +1476 -0
- package/templates/do/.tune_helper.py +982 -57
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/adapter +154 -0
- package/templates/do/benchmark +639 -85
- package/templates/do/build +5 -0
- package/templates/do/clean.d/async-inference.ejs +5 -0
- package/templates/do/clean.d/batch-transform.ejs +5 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
- package/templates/do/clean.d/managed-inference.ejs +5 -0
- package/templates/do/config +115 -45
- package/templates/do/deploy.d/async-inference.ejs +30 -3
- package/templates/do/deploy.d/batch-transform.ejs +29 -3
- package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
- package/templates/do/deploy.d/managed-inference.ejs +216 -14
- package/templates/do/lib/endpoint-config.sh +1 -1
- package/templates/do/lib/profile.sh +44 -0
- package/templates/do/optimize +106 -37
- package/templates/do/push +5 -0
- package/templates/do/register +94 -0
- package/templates/do/stage +567 -0
- package/templates/do/submit +7 -0
- package/templates/do/test +14 -0
- package/templates/do/tune +382 -59
- package/templates/do/validate +44 -4
|
@@ -10,9 +10,11 @@ set -o pipefail
|
|
|
10
10
|
FORCE_NEW=false
|
|
11
11
|
FORCE_IC=false
|
|
12
12
|
IC_TARGET=""
|
|
13
|
+
CI_FLAG=false
|
|
13
14
|
while [ $# -gt 0 ]; do
|
|
14
15
|
case "$1" in
|
|
15
16
|
--force) FORCE_NEW=true; shift ;;
|
|
17
|
+
--ci) CI_FLAG=true; shift ;;
|
|
16
18
|
--force-ic)
|
|
17
19
|
FORCE_IC=true
|
|
18
20
|
shift
|
|
@@ -32,13 +34,14 @@ while [ $# -gt 0 ]; do
|
|
|
32
34
|
shift 2
|
|
33
35
|
;;
|
|
34
36
|
--help|-h)
|
|
35
|
-
echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>]"
|
|
37
|
+
echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>] [--ci]"
|
|
36
38
|
echo ""
|
|
37
39
|
echo "Options:"
|
|
38
40
|
echo " --force Create a new endpoint and IC, even if one already exists."
|
|
39
41
|
echo " --force-ic Recreate ALL inference components on the existing endpoint."
|
|
40
42
|
echo " --force-ic <name> Recreate only the named IC on the existing endpoint."
|
|
41
43
|
echo " --ic <name> Deploy only the named IC (from do/ic/<name>.conf)."
|
|
44
|
+
echo " --ci Enable CI mode (structured errors, timeouts, idempotency)."
|
|
42
45
|
echo ""
|
|
43
46
|
echo "Without flags, deploy resumes from the last run."
|
|
44
47
|
exit 0
|
|
@@ -51,9 +54,169 @@ while [ $# -gt 0 ]; do
|
|
|
51
54
|
esac
|
|
52
55
|
done
|
|
53
56
|
|
|
57
|
+
# ============================================================
|
|
58
|
+
# CI-Mode Detection and Configuration
|
|
59
|
+
# ============================================================
|
|
60
|
+
# CI mode is activated by CI_MODE=true env var OR --ci flag
|
|
61
|
+
if [ "${CI_MODE:-false}" = "true" ] || [ "${CI_FLAG}" = "true" ]; then
|
|
62
|
+
CI_ACTIVE=true
|
|
63
|
+
else
|
|
64
|
+
CI_ACTIVE=false
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
# CI-mode timeout configuration (seconds)
|
|
68
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
69
|
+
CI_DEPLOY_TIMEOUT="${CI_DEPLOY_TIMEOUT_SECONDS:-1200}"
|
|
70
|
+
CI_DEPLOY_START=$(date +%s)
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
# _ci_emit_error <error_message> <error_type> <retryable>
|
|
74
|
+
# Emits structured JSON error output when in CI mode.
|
|
75
|
+
# In non-CI mode, prints human-readable error and exits.
|
|
76
|
+
_ci_emit_error() {
|
|
77
|
+
local error_msg="$1"
|
|
78
|
+
local error_type="$2"
|
|
79
|
+
local retryable="$3"
|
|
80
|
+
local elapsed=0
|
|
81
|
+
|
|
82
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
83
|
+
elapsed=$(( $(date +%s) - CI_DEPLOY_START ))
|
|
84
|
+
echo "{\"error\":\"${error_msg}\",\"error_type\":\"${error_type}\",\"instance_type\":\"${INSTANCE_TYPE:-unknown}\",\"region\":\"${AWS_REGION:-unknown}\",\"retryable\":${retryable},\"elapsed_seconds\":${elapsed}}"
|
|
85
|
+
exit 1
|
|
86
|
+
else
|
|
87
|
+
echo "❌ ${error_msg}"
|
|
88
|
+
exit 1
|
|
89
|
+
fi
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# _ci_check_timeout
|
|
93
|
+
# Checks if CI-mode timeout has been exceeded.
|
|
94
|
+
# Emits structured timeout error if so.
|
|
95
|
+
_ci_check_timeout() {
|
|
96
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
97
|
+
local elapsed=$(( $(date +%s) - CI_DEPLOY_START ))
|
|
98
|
+
if [ "${elapsed}" -ge "${CI_DEPLOY_TIMEOUT}" ]; then
|
|
99
|
+
_ci_emit_error "Deployment timed out after ${elapsed} seconds (limit: ${CI_DEPLOY_TIMEOUT}s)" "timeout" "true"
|
|
100
|
+
fi
|
|
101
|
+
fi
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
# _ci_create_endpoint_with_retry
|
|
105
|
+
# Wraps CreateEndpoint with exponential backoff for throttling.
|
|
106
|
+
# Base: 5 seconds, max 3 attempts.
|
|
107
|
+
_ci_create_endpoint_with_retry() {
|
|
108
|
+
local attempt=0
|
|
109
|
+
local max_attempts=3
|
|
110
|
+
local backoff=5
|
|
111
|
+
local ep_name="$1"
|
|
112
|
+
local ep_config="$2"
|
|
113
|
+
|
|
114
|
+
while [ "${attempt}" -lt "${max_attempts}" ]; do
|
|
115
|
+
attempt=$(( attempt + 1 ))
|
|
116
|
+
local create_output
|
|
117
|
+
create_output=$(aws sagemaker create-endpoint \
|
|
118
|
+
--endpoint-name "${ep_name}" \
|
|
119
|
+
--endpoint-config-name "${ep_config}" \
|
|
120
|
+
--region "${AWS_REGION}" 2>&1) && return 0
|
|
121
|
+
|
|
122
|
+
# Check if throttled
|
|
123
|
+
if echo "${create_output}" | grep -qi "ThrottlingException"; then
|
|
124
|
+
if [ "${attempt}" -lt "${max_attempts}" ]; then
|
|
125
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
126
|
+
echo "⏳ Throttled (attempt ${attempt}/${max_attempts}), retrying in ${backoff}s..."
|
|
127
|
+
else
|
|
128
|
+
echo "⏳ Throttled, retrying in ${backoff}s..."
|
|
129
|
+
fi
|
|
130
|
+
sleep "${backoff}"
|
|
131
|
+
backoff=$(( backoff * 2 ))
|
|
132
|
+
else
|
|
133
|
+
_ci_emit_error "CreateEndpoint throttled after ${max_attempts} attempts" "throttled" "true"
|
|
134
|
+
fi
|
|
135
|
+
elif echo "${create_output}" | grep -qi "InsufficientInstanceCapacity"; then
|
|
136
|
+
_ci_emit_error "InsufficientInstanceCapacity: Unable to provision ${INSTANCE_TYPE} in ${AWS_REGION}" "capacity" "true"
|
|
137
|
+
else
|
|
138
|
+
# Other API error
|
|
139
|
+
_ci_emit_error "CreateEndpoint failed: ${create_output}" "api_error" "false"
|
|
140
|
+
fi
|
|
141
|
+
done
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
# _ci_handle_existing_endpoint
|
|
145
|
+
# CI-mode idempotent deployment logic.
|
|
146
|
+
# Returns 0 if deployment should be skipped (already InService with matching config).
|
|
147
|
+
# Returns 1 if a fresh deploy should proceed.
|
|
148
|
+
# Handles bad-state cleanup (Failed/OutOfService → delete + recreate).
|
|
149
|
+
_ci_handle_existing_endpoint() {
|
|
150
|
+
local ep_name="${ENDPOINT_NAME:-}"
|
|
151
|
+
if [ -z "${ep_name}" ]; then
|
|
152
|
+
return 1 # No existing endpoint, proceed with fresh deploy
|
|
153
|
+
fi
|
|
154
|
+
|
|
155
|
+
local ep_status
|
|
156
|
+
ep_status=$(_get_endpoint_status "${ep_name}" 2>/dev/null || echo "")
|
|
157
|
+
|
|
158
|
+
case "${ep_status}" in
|
|
159
|
+
InService)
|
|
160
|
+
# Check if config matches (idempotent check)
|
|
161
|
+
if [ -n "${INFERENCE_COMPONENT_NAME:-}" ]; then
|
|
162
|
+
local ic_status
|
|
163
|
+
ic_status=$(_get_ic_status "${INFERENCE_COMPONENT_NAME}" 2>/dev/null || echo "")
|
|
164
|
+
if [ "${ic_status}" = "InService" ]; then
|
|
165
|
+
echo "✅ [CI] Endpoint InService with matching config — skipping deployment"
|
|
166
|
+
echo " Endpoint: ${ep_name}"
|
|
167
|
+
echo " Inference Component: ${INFERENCE_COMPONENT_NAME}"
|
|
168
|
+
return 0
|
|
169
|
+
fi
|
|
170
|
+
fi
|
|
171
|
+
return 1
|
|
172
|
+
;;
|
|
173
|
+
Failed|OutOfService)
|
|
174
|
+
echo "⚠️ [CI] Endpoint in bad state (${ep_status}): ${ep_name}"
|
|
175
|
+
echo " Deleting endpoint for fresh deployment..."
|
|
176
|
+
|
|
177
|
+
aws sagemaker delete-endpoint \
|
|
178
|
+
--endpoint-name "${ep_name}" \
|
|
179
|
+
--region "${AWS_REGION}" 2>/dev/null || true
|
|
180
|
+
|
|
181
|
+
# Wait for deletion to complete
|
|
182
|
+
local delete_start
|
|
183
|
+
delete_start=$(date +%s)
|
|
184
|
+
local delete_timeout=300 # 5 minutes
|
|
185
|
+
|
|
186
|
+
while true; do
|
|
187
|
+
_ci_check_timeout
|
|
188
|
+
local check_status
|
|
189
|
+
check_status=$(_get_endpoint_status "${ep_name}" 2>/dev/null || echo "")
|
|
190
|
+
if [ -z "${check_status}" ]; then
|
|
191
|
+
echo " ✅ Endpoint deleted: ${ep_name}"
|
|
192
|
+
break
|
|
193
|
+
fi
|
|
194
|
+
local del_elapsed=$(( $(date +%s) - delete_start ))
|
|
195
|
+
if [ "${del_elapsed}" -ge "${delete_timeout}" ]; then
|
|
196
|
+
_ci_emit_error "Endpoint deletion timed out for ${ep_name} (state: ${ep_status})" "endpoint_failed" "true"
|
|
197
|
+
fi
|
|
198
|
+
sleep 10
|
|
199
|
+
done
|
|
200
|
+
|
|
201
|
+
# Clear endpoint name so fresh deploy proceeds
|
|
202
|
+
ENDPOINT_NAME=""
|
|
203
|
+
return 1
|
|
204
|
+
;;
|
|
205
|
+
*)
|
|
206
|
+
return 1 # Unknown/absent state, proceed with fresh deploy
|
|
207
|
+
;;
|
|
208
|
+
esac
|
|
209
|
+
}
|
|
210
|
+
|
|
54
211
|
# Source configuration
|
|
55
212
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
56
213
|
source "${SCRIPT_DIR}/config"
|
|
214
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
215
|
+
|
|
216
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
217
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
|
|
218
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
219
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
57
220
|
|
|
58
221
|
echo "🚀 Deploying to AWS"
|
|
59
222
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -150,14 +313,14 @@ source "${SCRIPT_DIR}/lib/inference-component.sh"
|
|
|
150
313
|
|
|
151
314
|
# Validate execution role ARN
|
|
152
315
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
153
|
-
echo "❌
|
|
316
|
+
echo "❌ ROLE_ARN is not set."
|
|
317
|
+
echo " Run 'ml-container-creator bootstrap' to configure your profile,"
|
|
318
|
+
echo " or set ROLE_ARN as an environment variable."
|
|
154
319
|
echo ""
|
|
155
320
|
echo "Usage:"
|
|
156
321
|
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
157
322
|
echo " ./do/deploy"
|
|
158
323
|
echo ""
|
|
159
|
-
echo "Or set ROLE_ARN in do/config"
|
|
160
|
-
echo ""
|
|
161
324
|
echo "The execution role must have permissions for:"
|
|
162
325
|
echo " • SageMaker endpoint and inference component management"
|
|
163
326
|
echo " • ECR image access"
|
|
@@ -193,6 +356,31 @@ fi
|
|
|
193
356
|
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
194
357
|
resolve_secrets
|
|
195
358
|
|
|
359
|
+
<% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
|
|
360
|
+
# ============================================================
|
|
361
|
+
# Inject server environment variables into container Environment
|
|
362
|
+
# ============================================================
|
|
363
|
+
<% Object.keys(serverEnvVars).forEach(function(key) { %>
|
|
364
|
+
if [ -n "${<%= key %>:-}" ]; then
|
|
365
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
366
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"<%= key %>\":\"${<%= key %>}\""
|
|
367
|
+
else
|
|
368
|
+
CONTAINER_ENV_JSON="\"<%= key %>\":\"${<%= key %>}\""
|
|
369
|
+
fi
|
|
370
|
+
fi
|
|
371
|
+
<% }); %>
|
|
372
|
+
<% } %>
|
|
373
|
+
|
|
374
|
+
# ============================================================
|
|
375
|
+
# CI-Mode: Idempotent deployment check (runs before normal idempotency)
|
|
376
|
+
# ============================================================
|
|
377
|
+
if [ "${CI_ACTIVE}" = "true" ] && [ "${FORCE_NEW}" != "true" ]; then
|
|
378
|
+
if _ci_handle_existing_endpoint; then
|
|
379
|
+
# Endpoint already InService with matching config — exit successfully
|
|
380
|
+
exit 0
|
|
381
|
+
fi
|
|
382
|
+
fi
|
|
383
|
+
|
|
196
384
|
# ============================================================
|
|
197
385
|
# Idempotency: check for existing deployment from a previous run
|
|
198
386
|
# ============================================================
|
|
@@ -380,16 +568,20 @@ if [ -z "${SKIP_TO}" ]; then
|
|
|
380
568
|
|
|
381
569
|
# Step 2: Create endpoint
|
|
382
570
|
echo "🚀 Creating endpoint: ${ENDPOINT_NAME}"
|
|
383
|
-
if
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
571
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
572
|
+
_ci_create_endpoint_with_retry "${ENDPOINT_NAME}" "${ENDPOINT_CONFIG_NAME}"
|
|
573
|
+
else
|
|
574
|
+
if ! aws sagemaker create-endpoint \
|
|
575
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
576
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
577
|
+
--region "${AWS_REGION}"; then
|
|
578
|
+
|
|
579
|
+
echo "❌ Failed to create endpoint"
|
|
580
|
+
echo " Check that:"
|
|
581
|
+
echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
|
|
582
|
+
echo " • You have sufficient service quota in region: ${AWS_REGION}"
|
|
583
|
+
exit 4
|
|
584
|
+
fi
|
|
393
585
|
fi
|
|
394
586
|
|
|
395
587
|
echo "✅ Endpoint creation initiated: ${ENDPOINT_NAME}"
|
|
@@ -413,8 +605,18 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
|
413
605
|
echo " This may take a few minutes..."
|
|
414
606
|
echo " If this times out, re-run ./do/deploy to resume."
|
|
415
607
|
|
|
608
|
+
# CI-mode: check timeout during wait
|
|
609
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
610
|
+
_ci_check_timeout
|
|
611
|
+
fi
|
|
612
|
+
|
|
416
613
|
wait_endpoint "${ENDPOINT_NAME}"
|
|
417
614
|
|
|
615
|
+
# CI-mode: check timeout after wait completes
|
|
616
|
+
if [ "${CI_ACTIVE}" = "true" ]; then
|
|
617
|
+
_ci_check_timeout
|
|
618
|
+
fi
|
|
619
|
+
|
|
418
620
|
echo "✅ Endpoint is InService: ${ENDPOINT_NAME}"
|
|
419
621
|
fi
|
|
420
622
|
|
|
@@ -165,7 +165,7 @@ create_endpoint_config() {
|
|
|
165
165
|
# Optional: capacity reservation
|
|
166
166
|
if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
|
|
167
167
|
variant_json="${variant_json},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
|
|
168
|
-
echo "
|
|
168
|
+
echo " ✓ Capacity reservation: ${CAPACITY_RESERVATION_ARN}"
|
|
169
169
|
fi
|
|
170
170
|
|
|
171
171
|
variant_json="${variant_json}}]"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Profile loader — reads active bootstrap profile into _PROFILE[] associative array.
|
|
3
|
+
# Source this file after do/config. Values provide defaults; explicit env vars take precedence.
|
|
4
|
+
#
|
|
5
|
+
# Requires bash 4+ for associative array support.
|
|
6
|
+
# macOS ships with bash 3.2 — install bash 4+ via Homebrew: brew install bash
|
|
7
|
+
#
|
|
8
|
+
# Expected keys in _PROFILE:
|
|
9
|
+
# awsRegion, accountId, awsProfile, roleArn, ecrRepositoryName,
|
|
10
|
+
# benchmarkS3Bucket, ciBenchmarkResultsBucket, asyncS3Bucket, batchS3Bucket,
|
|
11
|
+
# ciTableName, ciInfraProvisioned
|
|
12
|
+
|
|
13
|
+
# Temporarily disable unbound variable checking for profile loading
|
|
14
|
+
# (keys may not exist in the profile config, and declare -A behavior
|
|
15
|
+
# varies across bash versions with set -u)
|
|
16
|
+
set +u 2>/dev/null || true
|
|
17
|
+
|
|
18
|
+
declare -A _PROFILE 2>/dev/null || true
|
|
19
|
+
if command -v python3 &>/dev/null; then
|
|
20
|
+
_PROFILE_RAW=$(python3 -c "
|
|
21
|
+
import json, os
|
|
22
|
+
try:
|
|
23
|
+
with open(os.path.expanduser('~/.ml-container-creator/config.json')) as f:
|
|
24
|
+
c = json.load(f)
|
|
25
|
+
p = c['profiles'][c['activeProfile']]
|
|
26
|
+
# Output as KEY=VALUE lines (simple, no JSON parsing in bash)
|
|
27
|
+
for k, v in p.items():
|
|
28
|
+
if isinstance(v, (str, int, float, bool)):
|
|
29
|
+
print(f'{k}={v}')
|
|
30
|
+
except:
|
|
31
|
+
pass
|
|
32
|
+
" 2>/dev/null) || _PROFILE_RAW=""
|
|
33
|
+
|
|
34
|
+
if [ -n "${_PROFILE_RAW}" ]; then
|
|
35
|
+
while IFS='=' read -r key value; do
|
|
36
|
+
[ -n "${key}" ] && _PROFILE["${key}"]="${value}"
|
|
37
|
+
done <<< "${_PROFILE_RAW}"
|
|
38
|
+
fi
|
|
39
|
+
fi
|
|
40
|
+
|
|
41
|
+
# NOTE: set -u is NOT re-enabled here. The caller is responsible for managing
|
|
42
|
+
# their own shell options. Re-enabling set -u would cause "unbound variable"
|
|
43
|
+
# errors when accessing _PROFILE keys on bash versions where empty associative
|
|
44
|
+
# arrays are treated as unset (bash 5.x on some platforms).
|
package/templates/do/optimize
CHANGED
|
@@ -106,6 +106,30 @@ elif [ -n "${INSTANCE_POOLS:-}" ]; then
|
|
|
106
106
|
elif [ -n "${INSTANCE_TYPE:-}" ]; then
|
|
107
107
|
INSTANCE_TYPES="${INSTANCE_TYPE}"
|
|
108
108
|
echo " Instances (from INSTANCE_TYPE): ${INSTANCE_TYPES}"
|
|
109
|
+
elif [ "${ENDPOINT_EXTERNAL:-}" = "true" ] && [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
110
|
+
# External endpoint — query the live endpoint config for instance type
|
|
111
|
+
echo " Discovering instance type from external endpoint: ${ENDPOINT_NAME}"
|
|
112
|
+
ENDPOINT_CONFIG_NAME=$(aws sagemaker describe-endpoint \
|
|
113
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
114
|
+
--region "${AWS_REGION}" \
|
|
115
|
+
--query 'EndpointConfigName' \
|
|
116
|
+
--output text 2>/dev/null) || ENDPOINT_CONFIG_NAME=""
|
|
117
|
+
|
|
118
|
+
if [ -n "${ENDPOINT_CONFIG_NAME}" ]; then
|
|
119
|
+
INSTANCE_TYPES=$(aws sagemaker describe-endpoint-config \
|
|
120
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
121
|
+
--region "${AWS_REGION}" \
|
|
122
|
+
--query 'ProductionVariants[0].InstanceType' \
|
|
123
|
+
--output text 2>/dev/null) || INSTANCE_TYPES=""
|
|
124
|
+
fi
|
|
125
|
+
|
|
126
|
+
if [ -n "${INSTANCE_TYPES}" ] && [ "${INSTANCE_TYPES}" != "None" ]; then
|
|
127
|
+
echo " Instances (from endpoint): ${INSTANCE_TYPES}"
|
|
128
|
+
else
|
|
129
|
+
echo "❌ Could not discover instance type from endpoint: ${ENDPOINT_NAME}"
|
|
130
|
+
echo " Provide --instances flag, or set INSTANCE_TYPE in do/config."
|
|
131
|
+
exit 1
|
|
132
|
+
fi
|
|
109
133
|
else
|
|
110
134
|
echo "❌ No instance types available."
|
|
111
135
|
echo " Provide --instances flag, or set INSTANCE_POOLS or INSTANCE_TYPE in do/config."
|
|
@@ -132,9 +156,9 @@ RESUME_EXISTING=false
|
|
|
132
156
|
|
|
133
157
|
if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
|
|
134
158
|
EXISTING_STATUS=$(aws sagemaker describe-ai-recommendation-job \
|
|
135
|
-
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
159
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
|
|
136
160
|
--region "${AWS_REGION}" \
|
|
137
|
-
--query '
|
|
161
|
+
--query 'AIRecommendationJobStatus' \
|
|
138
162
|
--output text 2>/dev/null) || EXISTING_STATUS=""
|
|
139
163
|
|
|
140
164
|
case "${EXISTING_STATUS}" in
|
|
@@ -154,7 +178,7 @@ if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
|
|
|
154
178
|
;;
|
|
155
179
|
FAILED|STOPPED)
|
|
156
180
|
FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
|
|
157
|
-
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
181
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
|
|
158
182
|
--region "${AWS_REGION}" \
|
|
159
183
|
--query 'FailureReason' \
|
|
160
184
|
--output text 2>/dev/null) || FAILURE_REASON="unknown"
|
|
@@ -174,44 +198,86 @@ fi
|
|
|
174
198
|
# ── Create recommendation job ─────────────────────────────────────────────────
|
|
175
199
|
if [ "${RESUME_EXISTING}" = false ]; then
|
|
176
200
|
OPTIMIZE_JOB_NAME="${PROJECT_NAME}-optimize-$(date +%Y%m%d-%H%M%S)"
|
|
201
|
+
WORKLOAD_CONFIG_NAME="${OPTIMIZE_JOB_NAME}-workload"
|
|
177
202
|
|
|
178
203
|
echo "🚀 Creating AI Recommendation Job: ${OPTIMIZE_JOB_NAME}"
|
|
179
204
|
|
|
180
|
-
#
|
|
181
|
-
|
|
182
|
-
|
|
205
|
+
# Step 1: Create workload config
|
|
206
|
+
echo " Creating workload config: ${WORKLOAD_CONFIG_NAME}"
|
|
207
|
+
|
|
208
|
+
WORKLOAD_SPEC_INNER="{\"benchmark\": {\"type\": \"aiperf\"}, \"parameters\": {\"prompt_input_tokens_mean\": ${INPUT_TOKENS}, \"prompt_input_tokens_stddev\": 150, \"output_tokens_mean\": ${OUTPUT_TOKENS}, \"output_tokens_stddev\": 50, \"concurrency\": ${CONCURRENCY}, \"streaming\": true}}"
|
|
209
|
+
WORKLOAD_SPEC_OUTER="{\"WorkloadSpec\": {\"Inline\": $(python3 -c "import json; print(json.dumps('${WORKLOAD_SPEC_INNER}'))")}}"
|
|
210
|
+
|
|
211
|
+
if ! aws sagemaker create-ai-workload-config \
|
|
212
|
+
--ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
|
|
213
|
+
--ai-workload-configs "${WORKLOAD_SPEC_OUTER}" \
|
|
214
|
+
--region "${AWS_REGION}" 2>&1 | grep -q "AIWorkloadConfigArn"; then
|
|
215
|
+
echo "❌ Failed to create workload config: ${WORKLOAD_CONFIG_NAME}"
|
|
216
|
+
echo " Check that the execution role has sagemaker:CreateAIWorkloadConfig permission."
|
|
217
|
+
# Show actual error
|
|
218
|
+
aws sagemaker create-ai-workload-config \
|
|
219
|
+
--ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
|
|
220
|
+
--ai-workload-configs "${WORKLOAD_SPEC_OUTER}" \
|
|
221
|
+
--region "${AWS_REGION}" 2>&1 || true
|
|
222
|
+
exit 1
|
|
223
|
+
fi
|
|
224
|
+
echo " ✅ Workload config created"
|
|
225
|
+
|
|
226
|
+
# Step 2: Build compute spec (instance types, max 3)
|
|
227
|
+
COMPUTE_SPEC_JSON=""
|
|
183
228
|
IFS=',' read -ra TYPES <<< "${INSTANCE_TYPES}"
|
|
229
|
+
INSTANCE_LIST=""
|
|
230
|
+
COUNT=0
|
|
184
231
|
for itype in "${TYPES[@]}"; do
|
|
185
232
|
itype=$(echo "${itype}" | xargs) # trim whitespace
|
|
186
|
-
if [
|
|
187
|
-
|
|
188
|
-
|
|
233
|
+
if [ ${COUNT} -ge 3 ]; then
|
|
234
|
+
echo " ⚠️ Max 3 instance types supported — truncating"
|
|
235
|
+
break
|
|
236
|
+
fi
|
|
237
|
+
if [ -n "${INSTANCE_LIST}" ]; then
|
|
238
|
+
INSTANCE_LIST="${INSTANCE_LIST},\"${itype}\""
|
|
189
239
|
else
|
|
190
|
-
|
|
240
|
+
INSTANCE_LIST="\"${itype}\""
|
|
191
241
|
fi
|
|
242
|
+
COUNT=$((COUNT + 1))
|
|
192
243
|
done
|
|
193
|
-
|
|
244
|
+
COMPUTE_SPEC_JSON="InstanceTypes=[${INSTANCE_LIST}]"
|
|
245
|
+
|
|
246
|
+
# Step 3: Map goal to performance target metric
|
|
247
|
+
case "${GOAL}" in
|
|
248
|
+
latency) PERF_METRIC="ttft-ms" ;;
|
|
249
|
+
throughput) PERF_METRIC="throughput" ;;
|
|
250
|
+
cost) PERF_METRIC="cost" ;;
|
|
251
|
+
esac
|
|
194
252
|
|
|
195
|
-
#
|
|
196
|
-
# The
|
|
197
|
-
|
|
253
|
+
# Step 4: Determine model source
|
|
254
|
+
# The recommendation API requires model artifacts as s3:// or https:// URI.
|
|
255
|
+
MODEL_SOURCE_ARG=""
|
|
198
256
|
if [[ "${MODEL_NAME}" == s3://* ]]; then
|
|
199
|
-
|
|
257
|
+
MODEL_SOURCE_ARG="S3={S3Uri=${MODEL_NAME}}"
|
|
200
258
|
else
|
|
201
|
-
|
|
259
|
+
# HuggingFace model — use the HTTPS URL for the model on HuggingFace Hub
|
|
260
|
+
MODEL_SOURCE_ARG="S3={S3Uri=https://huggingface.co/${MODEL_NAME}}"
|
|
202
261
|
fi
|
|
203
262
|
|
|
204
|
-
#
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
--
|
|
212
|
-
--
|
|
213
|
-
--
|
|
214
|
-
--
|
|
263
|
+
# Step 5: Create the recommendation job
|
|
264
|
+
S3_OUTPUT="s3://${TUNE_S3_BUCKET:-mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null)-${AWS_REGION}}/${PROJECT_NAME}/optimize/"
|
|
265
|
+
|
|
266
|
+
RECOMMEND_CMD=(
|
|
267
|
+
aws sagemaker create-ai-recommendation-job
|
|
268
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}"
|
|
269
|
+
--model-source "${MODEL_SOURCE_ARG}"
|
|
270
|
+
--output-config "S3OutputLocation=${S3_OUTPUT}"
|
|
271
|
+
--ai-workload-config-identifier "${WORKLOAD_CONFIG_NAME}"
|
|
272
|
+
--performance-target "Constraints=[{Metric=${PERF_METRIC}}]"
|
|
273
|
+
--role-arn "${ROLE_ARN}"
|
|
274
|
+
--compute-spec "${COMPUTE_SPEC_JSON}"
|
|
275
|
+
--inference-specification "Framework=VLLM"
|
|
276
|
+
--region "${AWS_REGION}"
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
if ! "${RECOMMEND_CMD[@]}" 2>&1; then
|
|
280
|
+
echo ""
|
|
215
281
|
echo "❌ Failed to create AI Recommendation Job"
|
|
216
282
|
echo " Check that:"
|
|
217
283
|
echo " • The execution role has sagemaker:CreateAIRecommendationJob permission"
|
|
@@ -232,7 +298,7 @@ fi
|
|
|
232
298
|
POLL_INTERVAL=30
|
|
233
299
|
MAX_POLL_ATTEMPTS=120 # 60 minutes max (120 * 30s)
|
|
234
300
|
|
|
235
|
-
if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ]; then
|
|
301
|
+
if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ] && [ "${JOB_STATUS:-}" != "Completed" ] && [ "${JOB_STATUS:-}" != "Failed" ] && [ "${JOB_STATUS:-}" != "Stopped" ]; then
|
|
236
302
|
|
|
237
303
|
echo "⏳ Waiting for recommendation job to complete..."
|
|
238
304
|
echo " Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 60 min)"
|
|
@@ -243,27 +309,30 @@ JOB_STATUS=""
|
|
|
243
309
|
|
|
244
310
|
while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
|
|
245
311
|
JOB_STATUS=$(aws sagemaker describe-ai-recommendation-job \
|
|
246
|
-
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
312
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
|
|
247
313
|
--region "${AWS_REGION}" \
|
|
248
|
-
--query '
|
|
314
|
+
--query 'AIRecommendationJobStatus' \
|
|
249
315
|
--output text 2>/dev/null) || {
|
|
250
316
|
echo "⚠️ Failed to describe recommendation job (credentials may have expired)"
|
|
251
317
|
echo " Re-run to check status:"
|
|
252
|
-
echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
|
|
318
|
+
echo " aws sagemaker describe-ai-recommendation-job --ai-recommendation-job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
|
|
253
319
|
exit 1
|
|
254
320
|
}
|
|
255
321
|
|
|
256
322
|
case "${JOB_STATUS}" in
|
|
257
|
-
COMPLETED)
|
|
323
|
+
COMPLETED|Completed)
|
|
258
324
|
echo "✅ Recommendation job completed!"
|
|
325
|
+
JOB_STATUS="COMPLETED"
|
|
259
326
|
break
|
|
260
327
|
;;
|
|
261
|
-
FAILED)
|
|
328
|
+
FAILED|Failed)
|
|
262
329
|
echo "❌ Recommendation job failed"
|
|
330
|
+
JOB_STATUS="FAILED"
|
|
263
331
|
break
|
|
264
332
|
;;
|
|
265
|
-
STOPPED)
|
|
333
|
+
STOPPED|Stopped)
|
|
266
334
|
echo "⚠️ Recommendation job was stopped"
|
|
335
|
+
JOB_STATUS="STOPPED"
|
|
267
336
|
break
|
|
268
337
|
;;
|
|
269
338
|
*)
|
|
@@ -292,7 +361,7 @@ if [ "${JOB_STATUS}" = "COMPLETED" ]; then
|
|
|
292
361
|
|
|
293
362
|
# Get the full job description with results
|
|
294
363
|
JOB_DESCRIPTION=$(aws sagemaker describe-ai-recommendation-job \
|
|
295
|
-
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
364
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
|
|
296
365
|
--region "${AWS_REGION}" \
|
|
297
366
|
--output json 2>/dev/null) || {
|
|
298
367
|
echo "❌ Failed to fetch recommendation results"
|
|
@@ -503,7 +572,7 @@ except:
|
|
|
503
572
|
|
|
504
573
|
if [ "${FAILURE_REASON}" = "unknown" ]; then
|
|
505
574
|
FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
|
|
506
|
-
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
575
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
|
|
507
576
|
--region "${AWS_REGION}" \
|
|
508
577
|
--query 'FailureReason' \
|
|
509
578
|
--output text 2>/dev/null) || FAILURE_REASON="unknown"
|
|
@@ -513,7 +582,7 @@ except:
|
|
|
513
582
|
echo " Reason: ${FAILURE_REASON}"
|
|
514
583
|
echo ""
|
|
515
584
|
echo " Debug:"
|
|
516
|
-
echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
|
|
585
|
+
echo " aws sagemaker describe-ai-recommendation-job --ai-recommendation-job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
|
|
517
586
|
|
|
518
587
|
elif [ "${JOB_STATUS}" = "STOPPED" ]; then
|
|
519
588
|
echo "⚠️ Recommendation job was stopped before completion"
|
package/templates/do/push
CHANGED
|
@@ -9,6 +9,11 @@ set -o pipefail
|
|
|
9
9
|
# Source configuration
|
|
10
10
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
11
|
source "${SCRIPT_DIR}/config"
|
|
12
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
|
+
|
|
14
|
+
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
|
|
12
17
|
|
|
13
18
|
echo "🚀 Pushing Docker image to Amazon ECR"
|
|
14
19
|
echo " Project: ${PROJECT_NAME}"
|