@aws/ml-container-creator 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +50760 -16218
- package/bin/cli.js +31 -137
- package/package.json +7 -2
- package/servers/lib/catalogs/instances.json +52 -1275
- package/servers/lib/catalogs/models.json +0 -132
- package/servers/lib/catalogs/popular-diffusors.json +1 -110
- package/src/app.js +29 -2
- package/src/lib/config-manager.js +17 -0
- package/src/lib/generated/cli-options.js +467 -0
- package/src/lib/generated/validation-rules.js +202 -0
- package/src/lib/mcp-client.js +16 -1
- package/src/lib/mcp-command-handler.js +10 -2
- package/src/lib/prompt-runner.js +16 -2
- package/src/lib/train-config-parser.js +136 -0
- package/src/lib/train-config-persistence.js +143 -0
- package/src/lib/train-config-validator.js +112 -0
- package/src/lib/train-feedback.js +46 -0
- package/src/lib/train-idempotency.js +97 -0
- package/src/lib/train-request-builder.js +120 -0
- package/templates/code/serve +5 -134
- package/templates/code/serve.d/lmi.ejs +19 -0
- package/templates/code/serve.d/sglang.ejs +47 -0
- package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
- package/templates/code/serve.d/vllm.ejs +48 -0
- package/templates/do/.train_build_request.py +141 -0
- package/templates/do/.train_poll_parser.py +135 -0
- package/templates/do/.train_status_parser.py +187 -0
- package/templates/do/clean +1 -1387
- package/templates/do/clean.d/async-inference.ejs +508 -0
- package/templates/do/clean.d/batch-transform.ejs +512 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
- package/templates/do/clean.d/managed-inference.ejs +1043 -0
- package/templates/do/deploy +1 -1766
- package/templates/do/deploy.d/async-inference.ejs +501 -0
- package/templates/do/deploy.d/batch-transform.ejs +529 -0
- package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
- package/templates/do/deploy.d/managed-inference.ejs +726 -0
- package/templates/do/lib/feedback.sh +41 -0
- package/templates/do/train +786 -0
- package/templates/do/training/config.yaml +140 -0
- package/templates/do/training/train.py +463 -0
|
@@ -0,0 +1,501 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
set -u
|
|
7
|
+
set -o pipefail
|
|
8
|
+
|
|
9
|
+
# Parse flags
|
|
10
|
+
FORCE_NEW=false
|
|
11
|
+
FORCE_IC=false
|
|
12
|
+
IC_TARGET=""
|
|
13
|
+
while [ $# -gt 0 ]; do
|
|
14
|
+
case "$1" in
|
|
15
|
+
--force) FORCE_NEW=true; shift ;;
|
|
16
|
+
--force-ic)
|
|
17
|
+
FORCE_IC=true
|
|
18
|
+
shift
|
|
19
|
+
;;
|
|
20
|
+
--help|-h)
|
|
21
|
+
echo "Usage: ./do/deploy [--force] [--force-ic]"
|
|
22
|
+
echo ""
|
|
23
|
+
echo "Options:"
|
|
24
|
+
echo " --force Create a new endpoint, even if one already exists."
|
|
25
|
+
echo " --force-ic Recreate the inference component on the existing endpoint."
|
|
26
|
+
echo ""
|
|
27
|
+
echo "Without flags, deploy resumes from the last run."
|
|
28
|
+
exit 0
|
|
29
|
+
;;
|
|
30
|
+
*)
|
|
31
|
+
echo "❌ Unknown option: $1"
|
|
32
|
+
echo " Run ./do/deploy --help for usage."
|
|
33
|
+
exit 1
|
|
34
|
+
;;
|
|
35
|
+
esac
|
|
36
|
+
done
|
|
37
|
+
|
|
38
|
+
# Source configuration
|
|
39
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
|
+
source "${SCRIPT_DIR}/config"
|
|
41
|
+
|
|
42
|
+
echo "🚀 Deploying to AWS"
|
|
43
|
+
echo " Project: ${PROJECT_NAME}"
|
|
44
|
+
echo " Deployment config: ${DEPLOYMENT_CONFIG}"
|
|
45
|
+
echo " Region: ${AWS_REGION}"
|
|
46
|
+
echo " Build target: ${BUILD_TARGET}"
|
|
47
|
+
echo " Deployment target: ${DEPLOYMENT_TARGET}"
|
|
48
|
+
echo " Instance type: ${INSTANCE_TYPE}"
|
|
49
|
+
echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
50
|
+
echo " SNS success: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
51
|
+
echo " SNS error: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
52
|
+
<% if (asyncMaxConcurrentInvocations) { %>
|
|
53
|
+
echo " Max concurrent: ${ASYNC_MAX_CONCURRENT_INVOCATIONS}"
|
|
54
|
+
<% } %>
|
|
55
|
+
|
|
56
|
+
# Check AWS credentials
|
|
57
|
+
echo "🔍 Validating AWS credentials..."
|
|
58
|
+
if ! aws sts get-caller-identity &> /dev/null; then
|
|
59
|
+
echo "❌ AWS credentials not configured"
|
|
60
|
+
echo " Run: aws configure"
|
|
61
|
+
echo " Or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"
|
|
62
|
+
exit 4
|
|
63
|
+
fi
|
|
64
|
+
|
|
65
|
+
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
66
|
+
echo "✅ AWS credentials validated (Account: ${AWS_ACCOUNT_ID})"
|
|
67
|
+
|
|
68
|
+
# Construct ECR repository URL
|
|
69
|
+
ECR_REPOSITORY="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPOSITORY_NAME}"
|
|
70
|
+
|
|
71
|
+
# ============================================================
|
|
72
|
+
# Shared: Verify ECR image exists
|
|
73
|
+
# ============================================================
|
|
74
|
+
echo "🔍 Verifying ECR image exists..."
|
|
75
|
+
if ! aws ecr describe-images \
|
|
76
|
+
--repository-name "${ECR_REPOSITORY_NAME}" \
|
|
77
|
+
--image-ids imageTag="${PROJECT_NAME}-latest" \
|
|
78
|
+
--region "${AWS_REGION}" &> /dev/null; then
|
|
79
|
+
|
|
80
|
+
echo "❌ ECR image not found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
|
|
81
|
+
echo ""
|
|
82
|
+
echo "Please build and push your image first:"
|
|
83
|
+
echo " ./do/submit"
|
|
84
|
+
echo ""
|
|
85
|
+
echo "After the build completes successfully, run this deploy script again."
|
|
86
|
+
exit 4
|
|
87
|
+
fi
|
|
88
|
+
|
|
89
|
+
echo "✅ ECR image found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
|
|
90
|
+
IMAGE_TAG="${PROJECT_NAME}-latest"
|
|
91
|
+
|
|
92
|
+
# ============================================================
|
|
93
|
+
# Shared: Resolve secrets for container environment
|
|
94
|
+
# ============================================================
|
|
95
|
+
CONTAINER_ENV_JSON=""
|
|
96
|
+
|
|
97
|
+
if [ -n "${HF_TOKEN_ARN:-}" ]; then
|
|
98
|
+
echo "🔐 Resolving HuggingFace token from Secrets Manager..."
|
|
99
|
+
RESOLVED_HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
100
|
+
echo "❌ Failed to resolve HuggingFace token from Secrets Manager"
|
|
101
|
+
exit 3
|
|
102
|
+
}
|
|
103
|
+
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${RESOLVED_HF_TOKEN}\""
|
|
104
|
+
elif [ -n "${HF_TOKEN:-}" ]; then
|
|
105
|
+
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${HF_TOKEN}\""
|
|
106
|
+
fi
|
|
107
|
+
|
|
108
|
+
if [ -n "${NGC_API_KEY_ARN:-}" ]; then
|
|
109
|
+
echo "🔐 Resolving NGC API key from Secrets Manager..."
|
|
110
|
+
RESOLVED_NGC_KEY=$(aws secretsmanager get-secret-value --secret-id "${NGC_API_KEY_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
111
|
+
echo "❌ Failed to resolve NGC API key from Secrets Manager"
|
|
112
|
+
exit 3
|
|
113
|
+
}
|
|
114
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
115
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
116
|
+
else
|
|
117
|
+
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
118
|
+
fi
|
|
119
|
+
elif [ -n "${NGC_API_KEY:-}" ]; then
|
|
120
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
121
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
122
|
+
else
|
|
123
|
+
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
124
|
+
fi
|
|
125
|
+
fi
|
|
126
|
+
|
|
127
|
+
# ============================================================
|
|
128
|
+
# SageMaker Async Inference Deployment (Model-Based)
|
|
129
|
+
# SageMaker async inference does NOT support Inference Components.
|
|
130
|
+
# Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
|
|
131
|
+
# ============================================================
|
|
132
|
+
|
|
133
|
+
# Source shared helpers
|
|
134
|
+
source "${SCRIPT_DIR}/lib/secrets.sh"
|
|
135
|
+
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
136
|
+
|
|
137
|
+
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
138
|
+
resolve_secrets
|
|
139
|
+
|
|
140
|
+
# Validate execution role ARN
|
|
141
|
+
if [ -z "${ROLE_ARN:-}" ]; then
|
|
142
|
+
echo "❌ Execution role ARN not provided"
|
|
143
|
+
echo ""
|
|
144
|
+
echo "Usage:"
|
|
145
|
+
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
146
|
+
echo " ./do/deploy"
|
|
147
|
+
echo ""
|
|
148
|
+
echo "Or set ROLE_ARN in do/config"
|
|
149
|
+
echo ""
|
|
150
|
+
echo "The execution role must have permissions for:"
|
|
151
|
+
echo " • SageMaker model and endpoint management"
|
|
152
|
+
echo " • ECR image access"
|
|
153
|
+
echo " • S3 write access for async output path: ${ASYNC_S3_OUTPUT_PATH}"
|
|
154
|
+
echo " • SNS publish permissions (optional, for notifications)"
|
|
155
|
+
echo " • CloudWatch Logs"
|
|
156
|
+
exit 3
|
|
157
|
+
fi
|
|
158
|
+
|
|
159
|
+
echo " Using execution role: ${ROLE_ARN}"
|
|
160
|
+
|
|
161
|
+
# ============================================================
|
|
162
|
+
# Bootstrap async infrastructure (S3 bucket + SNS topics)
|
|
163
|
+
# ============================================================
|
|
164
|
+
|
|
165
|
+
# Extract bucket name from S3 output path
|
|
166
|
+
ASYNC_S3_BUCKET=$(echo "${ASYNC_S3_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
|
|
167
|
+
|
|
168
|
+
<% if (!asyncS3OutputPath) { %>
|
|
169
|
+
# Bootstrap default S3 bucket (check-and-create)
|
|
170
|
+
echo "🔍 Checking if S3 bucket exists: ${ASYNC_S3_BUCKET}"
|
|
171
|
+
if ! aws s3api head-bucket --bucket "${ASYNC_S3_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
172
|
+
echo "📦 Creating S3 bucket: ${ASYNC_S3_BUCKET}"
|
|
173
|
+
if [ "${AWS_REGION}" = "us-east-1" ]; then
|
|
174
|
+
if ! aws s3api create-bucket \
|
|
175
|
+
--bucket "${ASYNC_S3_BUCKET}" \
|
|
176
|
+
--region "${AWS_REGION}"; then
|
|
177
|
+
echo "❌ Failed to create S3 bucket: ${ASYNC_S3_BUCKET}"
|
|
178
|
+
echo ""
|
|
179
|
+
echo " Check that:"
|
|
180
|
+
echo " • Your IAM credentials have s3:CreateBucket permission"
|
|
181
|
+
echo " • The bucket name is not already taken globally"
|
|
182
|
+
exit 4
|
|
183
|
+
fi
|
|
184
|
+
else
|
|
185
|
+
if ! aws s3api create-bucket \
|
|
186
|
+
--bucket "${ASYNC_S3_BUCKET}" \
|
|
187
|
+
--region "${AWS_REGION}" \
|
|
188
|
+
--create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
|
|
189
|
+
echo "❌ Failed to create S3 bucket: ${ASYNC_S3_BUCKET}"
|
|
190
|
+
echo ""
|
|
191
|
+
echo " Check that:"
|
|
192
|
+
echo " • Your IAM credentials have s3:CreateBucket permission"
|
|
193
|
+
echo " • The bucket name is not already taken globally"
|
|
194
|
+
exit 4
|
|
195
|
+
fi
|
|
196
|
+
fi
|
|
197
|
+
echo "✅ S3 bucket created: ${ASYNC_S3_BUCKET}"
|
|
198
|
+
else
|
|
199
|
+
echo "✅ S3 bucket exists: ${ASYNC_S3_BUCKET}"
|
|
200
|
+
fi
|
|
201
|
+
<% } else { %>
|
|
202
|
+
# Custom S3 output path provided — skip bucket creation
|
|
203
|
+
echo "✅ Using custom S3 output path: ${ASYNC_S3_OUTPUT_PATH}"
|
|
204
|
+
<% } %>
|
|
205
|
+
|
|
206
|
+
# Extract topic name from SNS success topic ARN
|
|
207
|
+
ASYNC_SNS_SUCCESS_TOPIC_NAME=$(echo "${ASYNC_SNS_SUCCESS_TOPIC}" | awk -F: '{print $NF}')
|
|
208
|
+
|
|
209
|
+
<% if (!asyncSnsSuccessTopic) { %>
|
|
210
|
+
# Bootstrap default SNS success topic (check-and-create)
|
|
211
|
+
echo "🔍 Checking if SNS success topic exists: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
212
|
+
if ! aws sns get-topic-attributes --topic-arn "${ASYNC_SNS_SUCCESS_TOPIC}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
213
|
+
echo "📦 Creating SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
214
|
+
if ! aws sns create-topic \
|
|
215
|
+
--name "${ASYNC_SNS_SUCCESS_TOPIC_NAME}" \
|
|
216
|
+
--region "${AWS_REGION}" > /dev/null; then
|
|
217
|
+
echo "❌ Failed to create SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
218
|
+
echo ""
|
|
219
|
+
echo " Check that:"
|
|
220
|
+
echo " • Your IAM credentials have sns:CreateTopic permission"
|
|
221
|
+
exit 4
|
|
222
|
+
fi
|
|
223
|
+
echo "✅ SNS success topic created: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
224
|
+
else
|
|
225
|
+
echo "✅ SNS success topic exists: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
226
|
+
fi
|
|
227
|
+
|
|
228
|
+
# Record SNS success topic in manifest (non-blocking)
|
|
229
|
+
./do/manifest add \
|
|
230
|
+
--type sns-topic \
|
|
231
|
+
--id "${ASYNC_SNS_SUCCESS_TOPIC}" \
|
|
232
|
+
--project "${PROJECT_NAME}" \
|
|
233
|
+
--meta "{\"topicName\":\"${ASYNC_SNS_SUCCESS_TOPIC_NAME}\",\"purpose\":\"async-success\",\"region\":\"${AWS_REGION}\"}" \
|
|
234
|
+
2>/dev/null || true
|
|
235
|
+
|
|
236
|
+
<% } else { %>
|
|
237
|
+
# Custom SNS success topic ARN provided — skip topic creation
|
|
238
|
+
echo "✅ Using custom SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
239
|
+
|
|
240
|
+
# Record SNS success topic in manifest (non-blocking)
|
|
241
|
+
ASYNC_SNS_SUCCESS_TOPIC_NAME=$(echo "${ASYNC_SNS_SUCCESS_TOPIC}" | awk -F: '{print $NF}')
|
|
242
|
+
./do/manifest add \
|
|
243
|
+
--type sns-topic \
|
|
244
|
+
--id "${ASYNC_SNS_SUCCESS_TOPIC}" \
|
|
245
|
+
--project "${PROJECT_NAME}" \
|
|
246
|
+
--meta "{\"topicName\":\"${ASYNC_SNS_SUCCESS_TOPIC_NAME}\",\"purpose\":\"async-success\",\"region\":\"${AWS_REGION}\"}" \
|
|
247
|
+
2>/dev/null || true
|
|
248
|
+
|
|
249
|
+
<% } %>
|
|
250
|
+
|
|
251
|
+
# Extract topic name from SNS error topic ARN
|
|
252
|
+
ASYNC_SNS_ERROR_TOPIC_NAME=$(echo "${ASYNC_SNS_ERROR_TOPIC}" | awk -F: '{print $NF}')
|
|
253
|
+
|
|
254
|
+
<% if (!asyncSnsErrorTopic) { %>
|
|
255
|
+
# Bootstrap default SNS error topic (check-and-create)
|
|
256
|
+
echo "🔍 Checking if SNS error topic exists: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
257
|
+
if ! aws sns get-topic-attributes --topic-arn "${ASYNC_SNS_ERROR_TOPIC}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
258
|
+
echo "📦 Creating SNS error topic: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
259
|
+
if ! aws sns create-topic \
|
|
260
|
+
--name "${ASYNC_SNS_ERROR_TOPIC_NAME}" \
|
|
261
|
+
--region "${AWS_REGION}" > /dev/null; then
|
|
262
|
+
echo "❌ Failed to create SNS error topic: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
263
|
+
echo ""
|
|
264
|
+
echo " Check that:"
|
|
265
|
+
echo " • Your IAM credentials have sns:CreateTopic permission"
|
|
266
|
+
exit 4
|
|
267
|
+
fi
|
|
268
|
+
echo "✅ SNS error topic created: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
269
|
+
else
|
|
270
|
+
echo "✅ SNS error topic exists: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
271
|
+
fi
|
|
272
|
+
|
|
273
|
+
# Record SNS error topic in manifest (non-blocking)
|
|
274
|
+
./do/manifest add \
|
|
275
|
+
--type sns-topic \
|
|
276
|
+
--id "${ASYNC_SNS_ERROR_TOPIC}" \
|
|
277
|
+
--project "${PROJECT_NAME}" \
|
|
278
|
+
--meta "{\"topicName\":\"${ASYNC_SNS_ERROR_TOPIC_NAME}\",\"purpose\":\"async-error\",\"region\":\"${AWS_REGION}\"}" \
|
|
279
|
+
2>/dev/null || true
|
|
280
|
+
|
|
281
|
+
<% } else { %>
|
|
282
|
+
# Custom SNS error topic ARN provided — skip topic creation
|
|
283
|
+
echo "✅ Using custom SNS error topic: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
284
|
+
|
|
285
|
+
# Record SNS error topic in manifest (non-blocking)
|
|
286
|
+
ASYNC_SNS_ERROR_TOPIC_NAME=$(echo "${ASYNC_SNS_ERROR_TOPIC}" | awk -F: '{print $NF}')
|
|
287
|
+
./do/manifest add \
|
|
288
|
+
--type sns-topic \
|
|
289
|
+
--id "${ASYNC_SNS_ERROR_TOPIC}" \
|
|
290
|
+
--project "${PROJECT_NAME}" \
|
|
291
|
+
--meta "{\"topicName\":\"${ASYNC_SNS_ERROR_TOPIC_NAME}\",\"purpose\":\"async-error\",\"region\":\"${AWS_REGION}\"}" \
|
|
292
|
+
2>/dev/null || true
|
|
293
|
+
|
|
294
|
+
<% } %>
|
|
295
|
+
|
|
296
|
+
# ============================================================
|
|
297
|
+
# Create async endpoint (classic model-based flow)
|
|
298
|
+
# SageMaker async inference does NOT support Inference Components.
|
|
299
|
+
# Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
|
|
300
|
+
# ============================================================
|
|
301
|
+
|
|
302
|
+
# ============================================================
|
|
303
|
+
# Idempotency: check for existing deployment from a previous run
|
|
304
|
+
# ============================================================
|
|
305
|
+
SKIP_TO=""
|
|
306
|
+
|
|
307
|
+
if [ "${FORCE_NEW}" = true ]; then
|
|
308
|
+
echo "🔄 --force: ignoring previous deployment, creating new resources."
|
|
309
|
+
elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
310
|
+
echo "🔍 Checking for existing deployment: ${ENDPOINT_NAME}"
|
|
311
|
+
|
|
312
|
+
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
313
|
+
|
|
314
|
+
case "${EP_STATUS}" in
|
|
315
|
+
InService)
|
|
316
|
+
echo "✅ Async endpoint already InService: ${ENDPOINT_NAME}"
|
|
317
|
+
echo ""
|
|
318
|
+
echo "📋 Deployment is already live. Nothing to do."
|
|
319
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
320
|
+
echo ""
|
|
321
|
+
echo "🧪 Test your async endpoint:"
|
|
322
|
+
echo " ./do/test"
|
|
323
|
+
echo ""
|
|
324
|
+
echo "🧹 Clean up when done:"
|
|
325
|
+
echo " ./do/clean endpoint"
|
|
326
|
+
exit 0
|
|
327
|
+
;;
|
|
328
|
+
Creating|Updating)
|
|
329
|
+
echo "⏳ Endpoint still ${EP_STATUS}: ${ENDPOINT_NAME}"
|
|
330
|
+
SKIP_TO="wait_endpoint"
|
|
331
|
+
;;
|
|
332
|
+
Failed)
|
|
333
|
+
echo "⚠️ Previous endpoint failed: ${ENDPOINT_NAME}"
|
|
334
|
+
echo " Creating a new deployment. Clean up the failed endpoint with:"
|
|
335
|
+
echo " ./do/clean endpoint"
|
|
336
|
+
echo ""
|
|
337
|
+
;;
|
|
338
|
+
"")
|
|
339
|
+
echo " Previous endpoint not found (may have been cleaned up). Creating new deployment."
|
|
340
|
+
;;
|
|
341
|
+
*)
|
|
342
|
+
echo " Endpoint in unexpected state: ${EP_STATUS}. Creating new deployment."
|
|
343
|
+
;;
|
|
344
|
+
esac
|
|
345
|
+
fi
|
|
346
|
+
|
|
347
|
+
# ============================================================
|
|
348
|
+
# Create async resources (skip if resuming from wait)
|
|
349
|
+
# ============================================================
|
|
350
|
+
if [ -z "${SKIP_TO}" ]; then
|
|
351
|
+
TIMESTAMP=$(date +%s)
|
|
352
|
+
MODEL_NAME_SM="${PROJECT_NAME}-async-model-${TIMESTAMP}"
|
|
353
|
+
ENDPOINT_CONFIG_NAME="${PROJECT_NAME}-async-epc-${TIMESTAMP}"
|
|
354
|
+
ENDPOINT_NAME="${PROJECT_NAME}-async-ep-${TIMESTAMP}"
|
|
355
|
+
|
|
356
|
+
_update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
|
|
357
|
+
_update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
|
|
358
|
+
_update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
|
|
359
|
+
|
|
360
|
+
# Step 1: Create SageMaker model
|
|
361
|
+
# Build primary container spec
|
|
362
|
+
PRIMARY_CONTAINER="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
|
|
363
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
364
|
+
PRIMARY_CONTAINER="${PRIMARY_CONTAINER},\"Environment\":{${CONTAINER_ENV_JSON}}"
|
|
365
|
+
fi
|
|
366
|
+
PRIMARY_CONTAINER="${PRIMARY_CONTAINER}}"
|
|
367
|
+
|
|
368
|
+
echo "📦 Creating SageMaker model: ${MODEL_NAME_SM}"
|
|
369
|
+
if ! aws sagemaker create-model \
|
|
370
|
+
--model-name "${MODEL_NAME_SM}" \
|
|
371
|
+
--primary-container "${PRIMARY_CONTAINER}" \
|
|
372
|
+
--execution-role-arn "${ROLE_ARN}" \
|
|
373
|
+
--region "${AWS_REGION}"; then
|
|
374
|
+
|
|
375
|
+
echo "❌ Failed to create SageMaker model"
|
|
376
|
+
echo " Check that:"
|
|
377
|
+
echo " • The execution role ARN is valid"
|
|
378
|
+
echo " • The ECR image exists and is accessible"
|
|
379
|
+
echo " • The IAM role has ecr:GetDownloadUrlForLayer permission"
|
|
380
|
+
exit 4
|
|
381
|
+
fi
|
|
382
|
+
|
|
383
|
+
echo "✅ SageMaker model created: ${MODEL_NAME_SM}"
|
|
384
|
+
|
|
385
|
+
# Record model in manifest (non-blocking)
|
|
386
|
+
MODEL_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${MODEL_NAME_SM}"
|
|
387
|
+
./do/manifest add \
|
|
388
|
+
--type sagemaker-model \
|
|
389
|
+
--id "${MODEL_ARN}" \
|
|
390
|
+
--project "${PROJECT_NAME}" \
|
|
391
|
+
--meta "{\"modelName\":\"${MODEL_NAME_SM}\",\"region\":\"${AWS_REGION}\"}" \
|
|
392
|
+
2>/dev/null || true
|
|
393
|
+
|
|
394
|
+
# Build production variant JSON (classic: includes ModelName, no execution-role-arn on endpoint config)
|
|
395
|
+
VARIANT_JSON="[{\"VariantName\":\"AllTraffic\",\"ModelName\":\"${MODEL_NAME_SM}\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1"
|
|
396
|
+
|
|
397
|
+
# Append InferenceAmiVersion if configured
|
|
398
|
+
if [ -n "${INFERENCE_AMI_VERSION:-}" ]; then
|
|
399
|
+
VARIANT_JSON="${VARIANT_JSON},\"InferenceAmiVersion\":\"${INFERENCE_AMI_VERSION}\""
|
|
400
|
+
echo " AMI version: ${INFERENCE_AMI_VERSION}"
|
|
401
|
+
fi
|
|
402
|
+
|
|
403
|
+
VARIANT_JSON="${VARIANT_JSON}}]"
|
|
404
|
+
|
|
405
|
+
# Build AsyncInferenceConfig JSON
|
|
406
|
+
ASYNC_CONFIG="{\"OutputConfig\":{\"S3OutputPath\":\"${ASYNC_S3_OUTPUT_PATH}\",\"NotificationConfig\":{\"SuccessTopic\":\"${ASYNC_SNS_SUCCESS_TOPIC}\",\"ErrorTopic\":\"${ASYNC_SNS_ERROR_TOPIC}\"}}"
|
|
407
|
+
|
|
408
|
+
if [ -n "${ASYNC_MAX_CONCURRENT_INVOCATIONS:-}" ]; then
|
|
409
|
+
ASYNC_CONFIG="${ASYNC_CONFIG},\"ClientConfig\":{\"MaxConcurrentInvocationsPerInstance\":${ASYNC_MAX_CONCURRENT_INVOCATIONS}}"
|
|
410
|
+
fi
|
|
411
|
+
|
|
412
|
+
ASYNC_CONFIG="${ASYNC_CONFIG}}"
|
|
413
|
+
|
|
414
|
+
# Step 2: Create endpoint configuration with AsyncInferenceConfig (no --execution-role-arn)
|
|
415
|
+
echo "⚙️ Creating async endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
|
|
416
|
+
if ! aws sagemaker create-endpoint-config \
|
|
417
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
418
|
+
--production-variants "${VARIANT_JSON}" \
|
|
419
|
+
--async-inference-config "${ASYNC_CONFIG}" \
|
|
420
|
+
--region "${AWS_REGION}"; then
|
|
421
|
+
|
|
422
|
+
echo "❌ Failed to create async endpoint configuration"
|
|
423
|
+
echo " Check that:"
|
|
424
|
+
echo " • The S3 output path is accessible: ${ASYNC_S3_OUTPUT_PATH}"
|
|
425
|
+
echo " • The IAM role has s3:PutObject permission on the output path"
|
|
426
|
+
echo " • The instance type is valid: ${INSTANCE_TYPE}"
|
|
427
|
+
echo " • The instance type is available in region: ${AWS_REGION}"
|
|
428
|
+
echo " • You have sufficient service quota for the instance type"
|
|
429
|
+
exit 4
|
|
430
|
+
fi
|
|
431
|
+
|
|
432
|
+
echo "✅ Async endpoint configuration created: ${ENDPOINT_CONFIG_NAME}"
|
|
433
|
+
|
|
434
|
+
# Record endpoint config in manifest (non-blocking)
|
|
435
|
+
ENDPOINT_CONFIG_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${ENDPOINT_CONFIG_NAME}"
|
|
436
|
+
./do/manifest add \
|
|
437
|
+
--type sagemaker-endpoint-config \
|
|
438
|
+
--id "${ENDPOINT_CONFIG_ARN}" \
|
|
439
|
+
--project "${PROJECT_NAME}" \
|
|
440
|
+
--meta "{\"endpointConfigName\":\"${ENDPOINT_CONFIG_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
441
|
+
2>/dev/null || true
|
|
442
|
+
|
|
443
|
+
# Step 3: Create endpoint
|
|
444
|
+
echo "🚀 Creating async endpoint: ${ENDPOINT_NAME}"
|
|
445
|
+
if ! aws sagemaker create-endpoint \
|
|
446
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
447
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
448
|
+
--region "${AWS_REGION}"; then
|
|
449
|
+
|
|
450
|
+
echo "❌ Failed to create async endpoint"
|
|
451
|
+
echo " Check that:"
|
|
452
|
+
echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
|
|
453
|
+
echo " • You have sufficient service quota in region: ${AWS_REGION}"
|
|
454
|
+
exit 4
|
|
455
|
+
fi
|
|
456
|
+
|
|
457
|
+
echo "✅ Async endpoint creation initiated: ${ENDPOINT_NAME}"
|
|
458
|
+
|
|
459
|
+
# Record endpoint in manifest (non-blocking)
|
|
460
|
+
ENDPOINT_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${ENDPOINT_NAME}"
|
|
461
|
+
./do/manifest add \
|
|
462
|
+
--type sagemaker-endpoint \
|
|
463
|
+
--id "${ENDPOINT_ARN}" \
|
|
464
|
+
--project "${PROJECT_NAME}" \
|
|
465
|
+
--meta "{\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
466
|
+
2>/dev/null || true
|
|
467
|
+
fi
|
|
468
|
+
|
|
469
|
+
# ============================================================
|
|
470
|
+
# Wait for endpoint (skip if already InService)
|
|
471
|
+
# ============================================================
|
|
472
|
+
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
473
|
+
echo "⏳ Waiting for async endpoint to reach InService status..."
|
|
474
|
+
echo " This may take several minutes..."
|
|
475
|
+
echo " If this times out, re-run ./do/deploy to resume."
|
|
476
|
+
|
|
477
|
+
wait_endpoint "${ENDPOINT_NAME}"
|
|
478
|
+
fi
|
|
479
|
+
|
|
480
|
+
echo "✅ Async deployment complete!"
|
|
481
|
+
echo ""
|
|
482
|
+
echo "📋 Deployment Details:"
|
|
483
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
484
|
+
echo " Endpoint Config: ${ENDPOINT_CONFIG_NAME}"
|
|
485
|
+
echo " Model: ${MODEL_NAME_SM}"
|
|
486
|
+
echo " Region: ${AWS_REGION}"
|
|
487
|
+
echo " Instance Type: ${INSTANCE_TYPE}"
|
|
488
|
+
echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
489
|
+
echo " S3 Output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
490
|
+
echo " SNS Success: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
491
|
+
echo " SNS Error: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
492
|
+
echo ""
|
|
493
|
+
echo "📋 What's next?"
|
|
494
|
+
echo " • Test your async endpoint: ./do/test"
|
|
495
|
+
echo " • Check async output: aws s3 ls ${ASYNC_S3_OUTPUT_PATH}"
|
|
496
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
497
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
498
|
+
<% } %>
|
|
499
|
+
echo " • Register this deployment: ./do/register"
|
|
500
|
+
echo " • View logs: ./do/logs"
|
|
501
|
+
echo " • Clean up when done: ./do/clean endpoint"
|