@aws/ml-container-creator 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +50760 -16218
- package/bin/cli.js +31 -137
- package/package.json +7 -2
- package/servers/lib/catalogs/instances.json +52 -1275
- package/servers/lib/catalogs/models.json +0 -132
- package/servers/lib/catalogs/popular-diffusors.json +1 -110
- package/src/app.js +29 -2
- package/src/lib/config-manager.js +17 -0
- package/src/lib/generated/cli-options.js +467 -0
- package/src/lib/generated/validation-rules.js +202 -0
- package/src/lib/mcp-client.js +16 -1
- package/src/lib/mcp-command-handler.js +10 -2
- package/src/lib/prompt-runner.js +16 -2
- package/src/lib/train-config-parser.js +136 -0
- package/src/lib/train-config-persistence.js +143 -0
- package/src/lib/train-config-validator.js +112 -0
- package/src/lib/train-feedback.js +46 -0
- package/src/lib/train-idempotency.js +97 -0
- package/src/lib/train-request-builder.js +120 -0
- package/templates/code/serve +5 -134
- package/templates/code/serve.d/lmi.ejs +19 -0
- package/templates/code/serve.d/sglang.ejs +47 -0
- package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
- package/templates/code/serve.d/vllm.ejs +48 -0
- package/templates/do/.train_build_request.py +141 -0
- package/templates/do/.train_poll_parser.py +135 -0
- package/templates/do/.train_status_parser.py +187 -0
- package/templates/do/clean +1 -1387
- package/templates/do/clean.d/async-inference.ejs +508 -0
- package/templates/do/clean.d/batch-transform.ejs +512 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
- package/templates/do/clean.d/managed-inference.ejs +1043 -0
- package/templates/do/deploy +1 -1766
- package/templates/do/deploy.d/async-inference.ejs +501 -0
- package/templates/do/deploy.d/batch-transform.ejs +529 -0
- package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
- package/templates/do/deploy.d/managed-inference.ejs +726 -0
- package/templates/do/lib/feedback.sh +41 -0
- package/templates/do/train +786 -0
- package/templates/do/training/config.yaml +140 -0
- package/templates/do/training/train.py +463 -0
package/templates/do/deploy
CHANGED
|
@@ -1,1766 +1 @@
|
|
|
1
|
-
|
|
2
|
-
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
|
|
5
|
-
set -e
|
|
6
|
-
set -u
|
|
7
|
-
set -o pipefail
|
|
8
|
-
|
|
9
|
-
# Parse flags
|
|
10
|
-
FORCE_NEW=false
|
|
11
|
-
FORCE_IC=false
|
|
12
|
-
IC_TARGET=""
|
|
13
|
-
while [ $# -gt 0 ]; do
|
|
14
|
-
case "$1" in
|
|
15
|
-
--force) FORCE_NEW=true; shift ;;
|
|
16
|
-
--force-ic)
|
|
17
|
-
FORCE_IC=true
|
|
18
|
-
shift
|
|
19
|
-
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
20
|
-
# Optional name argument: --force-ic <name>
|
|
21
|
-
if [ $# -gt 0 ] && [[ ! "$1" == --* ]]; then
|
|
22
|
-
IC_TARGET="$1"
|
|
23
|
-
shift
|
|
24
|
-
fi
|
|
25
|
-
<% } %>
|
|
26
|
-
;;
|
|
27
|
-
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
28
|
-
--ic)
|
|
29
|
-
if [ -z "${2:-}" ]; then
|
|
30
|
-
echo "❌ --ic requires a name argument"
|
|
31
|
-
echo " Usage: ./do/deploy --ic <name>"
|
|
32
|
-
exit 1
|
|
33
|
-
fi
|
|
34
|
-
IC_TARGET="$2"
|
|
35
|
-
shift 2
|
|
36
|
-
;;
|
|
37
|
-
<% } %>
|
|
38
|
-
--help|-h)
|
|
39
|
-
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
40
|
-
echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>]"
|
|
41
|
-
echo ""
|
|
42
|
-
echo "Options:"
|
|
43
|
-
echo " --force Create a new endpoint and IC, even if one already exists."
|
|
44
|
-
echo " --force-ic Recreate ALL inference components on the existing endpoint."
|
|
45
|
-
echo " --force-ic <name> Recreate only the named IC on the existing endpoint."
|
|
46
|
-
echo " --ic <name> Deploy only the named IC (from do/ic/<name>.conf)."
|
|
47
|
-
echo ""
|
|
48
|
-
echo "Without flags, deploy resumes from the last run."
|
|
49
|
-
<% } else { %>
|
|
50
|
-
echo "Usage: ./do/deploy [--force] [--force-ic]"
|
|
51
|
-
echo ""
|
|
52
|
-
echo "Options:"
|
|
53
|
-
echo " --force Create a new endpoint, even if one already exists."
|
|
54
|
-
echo " --force-ic Recreate the inference component on the existing endpoint."
|
|
55
|
-
echo ""
|
|
56
|
-
echo "Without flags, deploy resumes from the last run."
|
|
57
|
-
<% } %>
|
|
58
|
-
exit 0
|
|
59
|
-
;;
|
|
60
|
-
*)
|
|
61
|
-
echo "❌ Unknown option: $1"
|
|
62
|
-
echo " Run ./do/deploy --help for usage."
|
|
63
|
-
exit 1
|
|
64
|
-
;;
|
|
65
|
-
esac
|
|
66
|
-
done
|
|
67
|
-
|
|
68
|
-
# Source configuration
|
|
69
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
70
|
-
source "${SCRIPT_DIR}/config"
|
|
71
|
-
|
|
72
|
-
echo "🚀 Deploying to AWS"
|
|
73
|
-
echo " Project: ${PROJECT_NAME}"
|
|
74
|
-
echo " Deployment config: ${DEPLOYMENT_CONFIG}"
|
|
75
|
-
echo " Region: ${AWS_REGION}"
|
|
76
|
-
echo " Build target: ${BUILD_TARGET}"
|
|
77
|
-
echo " Deployment target: ${DEPLOYMENT_TARGET}"
|
|
78
|
-
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
79
|
-
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
80
|
-
echo " Endpoint: ${ENDPOINT_NAME} (external)"
|
|
81
|
-
else
|
|
82
|
-
echo " Instance type: ${INSTANCE_TYPE}"
|
|
83
|
-
fi
|
|
84
|
-
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
85
|
-
echo " Instance type: ${INSTANCE_TYPE}"
|
|
86
|
-
echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
87
|
-
echo " SNS success: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
88
|
-
echo " SNS error: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
89
|
-
<% if (asyncMaxConcurrentInvocations) { %>
|
|
90
|
-
echo " Max concurrent: ${ASYNC_MAX_CONCURRENT_INVOCATIONS}"
|
|
91
|
-
<% } %>
|
|
92
|
-
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
93
|
-
echo " HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
94
|
-
echo " Namespace: ${HYPERPOD_NAMESPACE}"
|
|
95
|
-
echo " Replicas: ${HYPERPOD_REPLICAS}"
|
|
96
|
-
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
97
|
-
echo " Instance type: ${INSTANCE_TYPE}"
|
|
98
|
-
echo " S3 input: ${BATCH_INPUT_PATH}"
|
|
99
|
-
echo " S3 output: ${BATCH_OUTPUT_PATH}"
|
|
100
|
-
echo " Instance count: ${BATCH_INSTANCE_COUNT}"
|
|
101
|
-
echo " Split type: ${BATCH_SPLIT_TYPE}"
|
|
102
|
-
echo " Strategy: ${BATCH_STRATEGY}"
|
|
103
|
-
<% } %>
|
|
104
|
-
|
|
105
|
-
# Check AWS credentials
|
|
106
|
-
echo "🔍 Validating AWS credentials..."
|
|
107
|
-
if ! aws sts get-caller-identity &> /dev/null; then
|
|
108
|
-
echo "❌ AWS credentials not configured"
|
|
109
|
-
echo " Run: aws configure"
|
|
110
|
-
echo " Or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"
|
|
111
|
-
exit 4
|
|
112
|
-
fi
|
|
113
|
-
|
|
114
|
-
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
115
|
-
echo "✅ AWS credentials validated (Account: ${AWS_ACCOUNT_ID})"
|
|
116
|
-
|
|
117
|
-
# Construct ECR repository URL
|
|
118
|
-
ECR_REPOSITORY="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPOSITORY_NAME}"
|
|
119
|
-
|
|
120
|
-
# ============================================================
|
|
121
|
-
# Shared: Verify ECR image exists
|
|
122
|
-
# ============================================================
|
|
123
|
-
echo "🔍 Verifying ECR image exists..."
|
|
124
|
-
if ! aws ecr describe-images \
|
|
125
|
-
--repository-name "${ECR_REPOSITORY_NAME}" \
|
|
126
|
-
--image-ids imageTag="${PROJECT_NAME}-latest" \
|
|
127
|
-
--region "${AWS_REGION}" &> /dev/null; then
|
|
128
|
-
|
|
129
|
-
echo "❌ ECR image not found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
|
|
130
|
-
echo ""
|
|
131
|
-
echo "Please build and push your image first:"
|
|
132
|
-
echo " ./do/submit"
|
|
133
|
-
echo ""
|
|
134
|
-
echo "After the build completes successfully, run this deploy script again."
|
|
135
|
-
exit 4
|
|
136
|
-
fi
|
|
137
|
-
|
|
138
|
-
echo "✅ ECR image found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
|
|
139
|
-
IMAGE_TAG="${PROJECT_NAME}-latest"
|
|
140
|
-
|
|
141
|
-
# ============================================================
|
|
142
|
-
# Shared: Resolve secrets for container environment
|
|
143
|
-
# ============================================================
|
|
144
|
-
CONTAINER_ENV_JSON=""
|
|
145
|
-
|
|
146
|
-
if [ -n "${HF_TOKEN_ARN:-}" ]; then
|
|
147
|
-
echo "🔐 Resolving HuggingFace token from Secrets Manager..."
|
|
148
|
-
RESOLVED_HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
149
|
-
echo "❌ Failed to resolve HuggingFace token from Secrets Manager"
|
|
150
|
-
exit 3
|
|
151
|
-
}
|
|
152
|
-
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${RESOLVED_HF_TOKEN}\""
|
|
153
|
-
elif [ -n "${HF_TOKEN:-}" ]; then
|
|
154
|
-
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${HF_TOKEN}\""
|
|
155
|
-
fi
|
|
156
|
-
|
|
157
|
-
if [ -n "${NGC_API_KEY_ARN:-}" ]; then
|
|
158
|
-
echo "🔐 Resolving NGC API key from Secrets Manager..."
|
|
159
|
-
RESOLVED_NGC_KEY=$(aws secretsmanager get-secret-value --secret-id "${NGC_API_KEY_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
160
|
-
echo "❌ Failed to resolve NGC API key from Secrets Manager"
|
|
161
|
-
exit 3
|
|
162
|
-
}
|
|
163
|
-
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
164
|
-
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
165
|
-
else
|
|
166
|
-
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
167
|
-
fi
|
|
168
|
-
elif [ -n "${NGC_API_KEY:-}" ]; then
|
|
169
|
-
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
170
|
-
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
171
|
-
else
|
|
172
|
-
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
173
|
-
fi
|
|
174
|
-
fi
|
|
175
|
-
|
|
176
|
-
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
177
|
-
# ============================================================
|
|
178
|
-
# SageMaker Real-Time Inference Deployment (Inference Components)
|
|
179
|
-
# ============================================================
|
|
180
|
-
|
|
181
|
-
# Source shared helpers
|
|
182
|
-
source "${SCRIPT_DIR}/lib/secrets.sh"
|
|
183
|
-
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
184
|
-
source "${SCRIPT_DIR}/lib/endpoint-config.sh"
|
|
185
|
-
source "${SCRIPT_DIR}/lib/inference-component.sh"
|
|
186
|
-
|
|
187
|
-
# Validate execution role ARN
|
|
188
|
-
if [ -z "${ROLE_ARN:-}" ]; then
|
|
189
|
-
echo "❌ Execution role ARN not provided"
|
|
190
|
-
echo ""
|
|
191
|
-
echo "Usage:"
|
|
192
|
-
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
193
|
-
echo " ./do/deploy"
|
|
194
|
-
echo ""
|
|
195
|
-
echo "Or set ROLE_ARN in do/config"
|
|
196
|
-
echo ""
|
|
197
|
-
echo "The execution role must have permissions for:"
|
|
198
|
-
echo " • SageMaker endpoint and inference component management"
|
|
199
|
-
echo " • ECR image access"
|
|
200
|
-
echo " • S3 access (if using model artifacts)"
|
|
201
|
-
echo " • CloudWatch Logs"
|
|
202
|
-
exit 3
|
|
203
|
-
fi
|
|
204
|
-
|
|
205
|
-
echo " Using execution role: ${ROLE_ARN}"
|
|
206
|
-
|
|
207
|
-
# Validate --ic argument if specified (set by --ic <name> or --force-ic <name>)
|
|
208
|
-
if [ -n "${IC_TARGET}" ]; then
|
|
209
|
-
if [ ! -d "${SCRIPT_DIR}/ic" ]; then
|
|
210
|
-
echo "❌ IC name specified but no do/ic/ directory found"
|
|
211
|
-
echo " This project does not use multi-IC configuration."
|
|
212
|
-
echo " Remove --ic/--force-ic <name> to deploy using the legacy single-IC path."
|
|
213
|
-
exit 1
|
|
214
|
-
fi
|
|
215
|
-
if [ ! -f "${SCRIPT_DIR}/ic/${IC_TARGET}.conf" ]; then
|
|
216
|
-
echo "❌ IC config not found: do/ic/${IC_TARGET}.conf"
|
|
217
|
-
echo ""
|
|
218
|
-
echo " Available ICs:"
|
|
219
|
-
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
220
|
-
[ -f "${conf}" ] || continue
|
|
221
|
-
echo " • $(basename "${conf}" .conf)"
|
|
222
|
-
done
|
|
223
|
-
echo ""
|
|
224
|
-
echo " Usage: ./do/deploy --ic <name>"
|
|
225
|
-
exit 1
|
|
226
|
-
fi
|
|
227
|
-
fi
|
|
228
|
-
|
|
229
|
-
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
230
|
-
resolve_secrets
|
|
231
|
-
|
|
232
|
-
# ============================================================
|
|
233
|
-
# Idempotency: check for existing deployment from a previous run
|
|
234
|
-
# ============================================================
|
|
235
|
-
SKIP_TO=""
|
|
236
|
-
|
|
237
|
-
if [ "${FORCE_NEW}" = true ]; then
|
|
238
|
-
echo "🔄 --force: ignoring previous deployment, creating new resources."
|
|
239
|
-
elif [ "${FORCE_IC}" = true ] && [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
240
|
-
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
241
|
-
if [ "${EP_STATUS}" = "InService" ]; then
|
|
242
|
-
if [ -n "${IC_TARGET}" ]; then
|
|
243
|
-
echo "🔄 --force-ic: recreating IC '${IC_TARGET}' on existing endpoint: ${ENDPOINT_NAME}"
|
|
244
|
-
else
|
|
245
|
-
echo "🔄 --force-ic: recreating ALL inference components on existing endpoint: ${ENDPOINT_NAME}"
|
|
246
|
-
fi
|
|
247
|
-
SKIP_TO="create_ic"
|
|
248
|
-
else
|
|
249
|
-
echo "⚠️ --force-ic requires an InService endpoint, but ${ENDPOINT_NAME} is: ${EP_STATUS:-not found}"
|
|
250
|
-
echo " Use --force to create a new endpoint, or wait for the current one."
|
|
251
|
-
exit 4
|
|
252
|
-
fi
|
|
253
|
-
elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
254
|
-
echo "🔍 Checking for existing deployment: ${ENDPOINT_NAME}"
|
|
255
|
-
|
|
256
|
-
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
257
|
-
|
|
258
|
-
case "${EP_STATUS}" in
|
|
259
|
-
InService)
|
|
260
|
-
echo "✅ Endpoint already InService: ${ENDPOINT_NAME}"
|
|
261
|
-
|
|
262
|
-
# Check inference component
|
|
263
|
-
if [ -n "${INFERENCE_COMPONENT_NAME:-}" ]; then
|
|
264
|
-
IC_STATUS=$(_get_ic_status "${INFERENCE_COMPONENT_NAME}")
|
|
265
|
-
|
|
266
|
-
case "${IC_STATUS}" in
|
|
267
|
-
InService)
|
|
268
|
-
echo "✅ Inference component already InService: ${INFERENCE_COMPONENT_NAME}"
|
|
269
|
-
echo ""
|
|
270
|
-
echo "📋 Deployment is already live. Nothing to do."
|
|
271
|
-
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
272
|
-
echo " Inference Component: ${INFERENCE_COMPONENT_NAME}"
|
|
273
|
-
echo ""
|
|
274
|
-
echo "🧪 Test your endpoint:"
|
|
275
|
-
echo " ./do/test"
|
|
276
|
-
echo ""
|
|
277
|
-
echo "🧹 Clean up when done:"
|
|
278
|
-
echo " ./do/clean endpoint"
|
|
279
|
-
exit 0
|
|
280
|
-
;;
|
|
281
|
-
Creating)
|
|
282
|
-
echo "⏳ Inference component still creating: ${INFERENCE_COMPONENT_NAME}"
|
|
283
|
-
SKIP_TO="wait_ic"
|
|
284
|
-
IC_DEPLOYED_NAME="${INFERENCE_COMPONENT_NAME}"
|
|
285
|
-
;;
|
|
286
|
-
Failed)
|
|
287
|
-
echo "⚠️ Inference component failed: ${INFERENCE_COMPONENT_NAME}"
|
|
288
|
-
echo " Will create a new inference component on the existing endpoint."
|
|
289
|
-
SKIP_TO="create_ic"
|
|
290
|
-
;;
|
|
291
|
-
*)
|
|
292
|
-
# Stored IC not found — check if a different IC is running on this endpoint
|
|
293
|
-
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
294
|
-
# External endpoint: never adopt ICs we didn't create
|
|
295
|
-
echo " Stored IC not found on external endpoint. Will create a new one."
|
|
296
|
-
SKIP_TO="create_ic"
|
|
297
|
-
else
|
|
298
|
-
LIVE_IC=$(_find_active_ic_on_endpoint "${ENDPOINT_NAME}")
|
|
299
|
-
if [ -n "${LIVE_IC}" ] && [ "${LIVE_IC}" != "None" ]; then
|
|
300
|
-
echo "✅ Found running inference component on endpoint: ${LIVE_IC}"
|
|
301
|
-
echo " (config had stale reference: ${INFERENCE_COMPONENT_NAME})"
|
|
302
|
-
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
303
|
-
echo ""
|
|
304
|
-
echo "📋 Deployment is already live. Nothing to do."
|
|
305
|
-
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
306
|
-
echo " Inference Component: ${LIVE_IC}"
|
|
307
|
-
echo ""
|
|
308
|
-
echo "🧪 Test your endpoint:"
|
|
309
|
-
echo " ./do/test"
|
|
310
|
-
echo ""
|
|
311
|
-
echo "🧹 Clean up when done:"
|
|
312
|
-
echo " ./do/clean endpoint"
|
|
313
|
-
exit 0
|
|
314
|
-
else
|
|
315
|
-
echo " No existing inference component found on endpoint. Will create one."
|
|
316
|
-
SKIP_TO="create_ic"
|
|
317
|
-
fi
|
|
318
|
-
fi
|
|
319
|
-
;;
|
|
320
|
-
esac
|
|
321
|
-
else
|
|
322
|
-
# No IC name in config — check if one is already running on the endpoint
|
|
323
|
-
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
324
|
-
# External endpoint: never adopt ICs we didn't create
|
|
325
|
-
echo " No previous IC deployed by this project. Will create a new one."
|
|
326
|
-
SKIP_TO="create_ic"
|
|
327
|
-
else
|
|
328
|
-
LIVE_IC=$(_find_active_ic_on_endpoint "${ENDPOINT_NAME}")
|
|
329
|
-
if [ -n "${LIVE_IC}" ] && [ "${LIVE_IC}" != "None" ]; then
|
|
330
|
-
echo "✅ Found running inference component on endpoint: ${LIVE_IC}"
|
|
331
|
-
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
332
|
-
echo ""
|
|
333
|
-
echo "📋 Deployment is already live. Nothing to do."
|
|
334
|
-
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
335
|
-
echo " Inference Component: ${LIVE_IC}"
|
|
336
|
-
echo ""
|
|
337
|
-
echo "🧪 Test your endpoint:"
|
|
338
|
-
echo " ./do/test"
|
|
339
|
-
echo ""
|
|
340
|
-
echo "🧹 Clean up when done:"
|
|
341
|
-
echo " ./do/clean endpoint"
|
|
342
|
-
exit 0
|
|
343
|
-
else
|
|
344
|
-
SKIP_TO="create_ic"
|
|
345
|
-
fi
|
|
346
|
-
fi
|
|
347
|
-
fi
|
|
348
|
-
;;
|
|
349
|
-
Creating|Updating)
|
|
350
|
-
echo "⏳ Endpoint still ${EP_STATUS}: ${ENDPOINT_NAME}"
|
|
351
|
-
SKIP_TO="wait_endpoint"
|
|
352
|
-
;;
|
|
353
|
-
Failed)
|
|
354
|
-
echo "⚠️ Previous endpoint failed: ${ENDPOINT_NAME}"
|
|
355
|
-
echo " Creating a new deployment. Clean up the failed endpoint with:"
|
|
356
|
-
echo " ./do/clean endpoint"
|
|
357
|
-
echo ""
|
|
358
|
-
# Fall through to create new resources
|
|
359
|
-
;;
|
|
360
|
-
"")
|
|
361
|
-
echo " Previous endpoint not found (may have been cleaned up). Creating new deployment."
|
|
362
|
-
;;
|
|
363
|
-
*)
|
|
364
|
-
echo " Endpoint in unexpected state: ${EP_STATUS}. Creating new deployment."
|
|
365
|
-
;;
|
|
366
|
-
esac
|
|
367
|
-
fi
|
|
368
|
-
|
|
369
|
-
# ============================================================
|
|
370
|
-
# Step 1: Create endpoint configuration and endpoint (skip if resuming)
|
|
371
|
-
# ============================================================
|
|
372
|
-
if [ -z "${SKIP_TO}" ]; then
|
|
373
|
-
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
374
|
-
# External endpoint: validate it still exists and is InService
|
|
375
|
-
echo "🔗 Using external endpoint: ${ENDPOINT_NAME}"
|
|
376
|
-
echo " Validating endpoint status..."
|
|
377
|
-
|
|
378
|
-
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
379
|
-
|
|
380
|
-
if [ -z "${EP_STATUS}" ]; then
|
|
381
|
-
echo "❌ External endpoint not found: ${ENDPOINT_NAME}"
|
|
382
|
-
echo " The endpoint may have been deleted. Update ENDPOINT_NAME in do/config"
|
|
383
|
-
echo " or remove ENDPOINT_EXTERNAL=true to create a new endpoint."
|
|
384
|
-
exit 4
|
|
385
|
-
fi
|
|
386
|
-
|
|
387
|
-
if [ "${EP_STATUS}" != "InService" ]; then
|
|
388
|
-
echo "❌ External endpoint not InService: ${ENDPOINT_NAME} (status: ${EP_STATUS})"
|
|
389
|
-
echo " The endpoint must be InService before attaching inference components."
|
|
390
|
-
echo " Wait for the endpoint to become InService, or update do/config."
|
|
391
|
-
exit 4
|
|
392
|
-
fi
|
|
393
|
-
|
|
394
|
-
echo "✅ External endpoint is InService: ${ENDPOINT_NAME}"
|
|
395
|
-
# Skip directly to IC creation — no endpoint config, no endpoint creation, no wait
|
|
396
|
-
SKIP_TO="create_ic"
|
|
397
|
-
else
|
|
398
|
-
TIMESTAMP=$(date +%s)
|
|
399
|
-
ENDPOINT_NAME="${PROJECT_NAME}-endpoint-${TIMESTAMP}"
|
|
400
|
-
|
|
401
|
-
_update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
|
|
402
|
-
|
|
403
|
-
# Create endpoint configuration via shared helper
|
|
404
|
-
create_endpoint_config
|
|
405
|
-
|
|
406
|
-
_update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
|
|
407
|
-
|
|
408
|
-
# Record endpoint config in manifest (non-blocking)
|
|
409
|
-
ENDPOINT_CONFIG_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${ENDPOINT_CONFIG_NAME}"
|
|
410
|
-
./do/manifest add \
|
|
411
|
-
--type sagemaker-endpoint-config \
|
|
412
|
-
--id "${ENDPOINT_CONFIG_ARN}" \
|
|
413
|
-
--project "${PROJECT_NAME}" \
|
|
414
|
-
--meta "{\"endpointConfigName\":\"${ENDPOINT_CONFIG_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
415
|
-
2>/dev/null || true
|
|
416
|
-
|
|
417
|
-
# Step 2: Create endpoint
|
|
418
|
-
echo "🚀 Creating endpoint: ${ENDPOINT_NAME}"
|
|
419
|
-
if ! aws sagemaker create-endpoint \
|
|
420
|
-
--endpoint-name "${ENDPOINT_NAME}" \
|
|
421
|
-
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
422
|
-
--region "${AWS_REGION}"; then
|
|
423
|
-
|
|
424
|
-
echo "❌ Failed to create endpoint"
|
|
425
|
-
echo " Check that:"
|
|
426
|
-
echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
|
|
427
|
-
echo " • You have sufficient service quota in region: ${AWS_REGION}"
|
|
428
|
-
exit 4
|
|
429
|
-
fi
|
|
430
|
-
|
|
431
|
-
echo "✅ Endpoint creation initiated: ${ENDPOINT_NAME}"
|
|
432
|
-
|
|
433
|
-
# Record endpoint in manifest (non-blocking)
|
|
434
|
-
ENDPOINT_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${ENDPOINT_NAME}"
|
|
435
|
-
./do/manifest add \
|
|
436
|
-
--type sagemaker-endpoint \
|
|
437
|
-
--id "${ENDPOINT_ARN}" \
|
|
438
|
-
--project "${PROJECT_NAME}" \
|
|
439
|
-
--meta "{\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
440
|
-
2>/dev/null || true
|
|
441
|
-
fi
|
|
442
|
-
fi
|
|
443
|
-
|
|
444
|
-
# ============================================================
|
|
445
|
-
# Wait for endpoint (skip if already InService or external)
|
|
446
|
-
# ============================================================
|
|
447
|
-
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
448
|
-
echo "⏳ Waiting for endpoint to reach InService status..."
|
|
449
|
-
echo " This may take a few minutes..."
|
|
450
|
-
echo " If this times out, re-run ./do/deploy to resume."
|
|
451
|
-
|
|
452
|
-
wait_endpoint "${ENDPOINT_NAME}"
|
|
453
|
-
|
|
454
|
-
echo "✅ Endpoint is InService: ${ENDPOINT_NAME}"
|
|
455
|
-
fi
|
|
456
|
-
|
|
457
|
-
# ============================================================
|
|
458
|
-
# Step 3: Deploy inference components (skip if resuming from wait_ic)
|
|
459
|
-
# ============================================================
|
|
460
|
-
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
461
|
-
|
|
462
|
-
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
463
|
-
# _check_gpu_capacity
|
|
464
|
-
# Best-effort capacity guardrail: sums IC_GPU_COUNT across all do/ic/*.conf
|
|
465
|
-
# and compares against known GPU count for the instance type.
|
|
466
|
-
# Warns (does not error) if total exceeds instance capacity.
|
|
467
|
-
# Skips check if instance type is not in the known map.
|
|
468
|
-
_check_gpu_capacity() {
|
|
469
|
-
# Skip check if no INSTANCE_TYPE (external endpoints)
|
|
470
|
-
if [ -z "${INSTANCE_TYPE:-}" ]; then
|
|
471
|
-
return 0
|
|
472
|
-
fi
|
|
473
|
-
|
|
474
|
-
# Best-effort capacity guardrail: sums GPU requirements from base ICs only.
|
|
475
|
-
# NOTE: Only do/ic/*.conf files are counted. Adapter ICs (do/adapters/*.conf)
|
|
476
|
-
# share the base IC's GPU resources and have no ComputeResourceRequirements,
|
|
477
|
-
# so they are intentionally excluded from this capacity check.
|
|
478
|
-
#
|
|
479
|
-
# Hardcoded GPU counts for common SageMaker GPU instance types
|
|
480
|
-
local instance_gpus=""
|
|
481
|
-
case "${INSTANCE_TYPE}" in
|
|
482
|
-
ml.g4dn.xlarge) instance_gpus=1 ;;
|
|
483
|
-
ml.g4dn.12xlarge) instance_gpus=4 ;;
|
|
484
|
-
ml.g5.xlarge) instance_gpus=1 ;;
|
|
485
|
-
ml.g5.2xlarge) instance_gpus=1 ;;
|
|
486
|
-
ml.g5.4xlarge) instance_gpus=1 ;;
|
|
487
|
-
ml.g5.8xlarge) instance_gpus=1 ;;
|
|
488
|
-
ml.g5.12xlarge) instance_gpus=4 ;;
|
|
489
|
-
ml.g5.48xlarge) instance_gpus=8 ;;
|
|
490
|
-
ml.g6.xlarge) instance_gpus=1 ;;
|
|
491
|
-
ml.g6.12xlarge) instance_gpus=4 ;;
|
|
492
|
-
ml.g6.48xlarge) instance_gpus=8 ;;
|
|
493
|
-
ml.g6e.xlarge) instance_gpus=1 ;;
|
|
494
|
-
ml.g6e.2xlarge) instance_gpus=1 ;;
|
|
495
|
-
ml.g6e.4xlarge) instance_gpus=1 ;;
|
|
496
|
-
ml.g6e.8xlarge) instance_gpus=1 ;;
|
|
497
|
-
ml.g6e.12xlarge) instance_gpus=4 ;;
|
|
498
|
-
ml.g6e.48xlarge) instance_gpus=8 ;;
|
|
499
|
-
ml.g7e.xlarge) instance_gpus=1 ;;
|
|
500
|
-
ml.g7e.2xlarge) instance_gpus=1 ;;
|
|
501
|
-
ml.g7e.4xlarge) instance_gpus=1 ;;
|
|
502
|
-
ml.g7e.8xlarge) instance_gpus=1 ;;
|
|
503
|
-
ml.g7e.12xlarge) instance_gpus=4 ;;
|
|
504
|
-
ml.g7e.48xlarge) instance_gpus=8 ;;
|
|
505
|
-
ml.p3.2xlarge) instance_gpus=1 ;;
|
|
506
|
-
ml.p3.8xlarge) instance_gpus=4 ;;
|
|
507
|
-
ml.p3.16xlarge) instance_gpus=8 ;;
|
|
508
|
-
ml.p4d.24xlarge) instance_gpus=8 ;;
|
|
509
|
-
ml.p4de.24xlarge) instance_gpus=8 ;;
|
|
510
|
-
ml.p5.48xlarge) instance_gpus=8 ;;
|
|
511
|
-
*) instance_gpus="" ;;
|
|
512
|
-
esac
|
|
513
|
-
|
|
514
|
-
# Skip check if instance type not in map
|
|
515
|
-
if [ -z "${instance_gpus}" ]; then
|
|
516
|
-
return 0
|
|
517
|
-
fi
|
|
518
|
-
|
|
519
|
-
# Sum IC_GPU_COUNT across all IC config files
|
|
520
|
-
local total_gpu_requested=0
|
|
521
|
-
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
522
|
-
[ -f "${conf}" ] || continue
|
|
523
|
-
local ic_gpus
|
|
524
|
-
ic_gpus=$(grep "^export IC_GPU_COUNT=" "${conf}" 2>/dev/null | sed 's/^export IC_GPU_COUNT=//' | tr -d '"' || echo "1")
|
|
525
|
-
if [ -z "${ic_gpus}" ]; then
|
|
526
|
-
ic_gpus=1
|
|
527
|
-
fi
|
|
528
|
-
total_gpu_requested=$(( total_gpu_requested + ic_gpus ))
|
|
529
|
-
done
|
|
530
|
-
|
|
531
|
-
if [ "${total_gpu_requested}" -gt "${instance_gpus}" ]; then
|
|
532
|
-
echo ""
|
|
533
|
-
echo "⚠️ GPU capacity warning: ICs request ${total_gpu_requested} GPUs total, but ${INSTANCE_TYPE} has ${instance_gpus} GPUs."
|
|
534
|
-
echo " SageMaker will likely reject IC creation if capacity is exceeded."
|
|
535
|
-
echo " Consider reducing IC_GPU_COUNT values or using a larger instance type."
|
|
536
|
-
echo ""
|
|
537
|
-
fi
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
# Run capacity guardrail before deploying ICs
|
|
541
|
-
_check_gpu_capacity
|
|
542
|
-
|
|
543
|
-
# _delete_and_wait_ic <ic_name>
|
|
544
|
-
# Deletes an inference component and waits for deletion to complete.
|
|
545
|
-
# Polls until the IC is no longer found (avoids name conflicts on recreate).
|
|
546
|
-
_delete_and_wait_ic() {
|
|
547
|
-
local ic_name="$1"
|
|
548
|
-
local delete_timeout=600 # 10 minutes max wait for deletion
|
|
549
|
-
|
|
550
|
-
echo "🗑️ Deleting inference component: ${ic_name}"
|
|
551
|
-
if ! aws sagemaker delete-inference-component \
|
|
552
|
-
--inference-component-name "${ic_name}" \
|
|
553
|
-
--region "${AWS_REGION}" 2>/dev/null; then
|
|
554
|
-
echo " ⚠️ Delete call failed (IC may already be gone). Continuing..."
|
|
555
|
-
return 0
|
|
556
|
-
fi
|
|
557
|
-
|
|
558
|
-
echo " Waiting for deletion to complete..."
|
|
559
|
-
local delete_start
|
|
560
|
-
delete_start=$(date +%s)
|
|
561
|
-
|
|
562
|
-
while true; do
|
|
563
|
-
local ic_status
|
|
564
|
-
ic_status=$(_get_ic_status "${ic_name}")
|
|
565
|
-
|
|
566
|
-
if [ -z "${ic_status}" ]; then
|
|
567
|
-
echo " ✅ Inference component deleted: ${ic_name}"
|
|
568
|
-
break
|
|
569
|
-
fi
|
|
570
|
-
|
|
571
|
-
local elapsed=$(( $(date +%s) - delete_start ))
|
|
572
|
-
if [ "${elapsed}" -ge "${delete_timeout}" ]; then
|
|
573
|
-
echo " ⚠️ Deletion timed out after ${delete_timeout}s. IC status: ${ic_status}"
|
|
574
|
-
echo " Proceeding anyway — SageMaker may reject the new IC if name conflicts."
|
|
575
|
-
break
|
|
576
|
-
fi
|
|
577
|
-
|
|
578
|
-
echo " $(date +%H:%M:%S) Deleting... (${ic_status}, ${elapsed}s elapsed)"
|
|
579
|
-
sleep 15
|
|
580
|
-
done
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
# _deploy_single_ic <conf_file>
|
|
584
|
-
# Deploys a single IC with per-IC idempotency:
|
|
585
|
-
# - If FORCE_IC is true: delete existing IC, clear state, create fresh
|
|
586
|
-
# - If IC_DEPLOYED_NAME is set and InService → skip
|
|
587
|
-
# - If IC_DEPLOYED_NAME is set and Creating → wait for it
|
|
588
|
-
# - If IC_DEPLOYED_NAME is set and Failed → recreate with new timestamp
|
|
589
|
-
# - If IC_DEPLOYED_NAME is not set → create new IC
|
|
590
|
-
# Fail-fast: exits immediately on failure.
|
|
591
|
-
_deploy_single_ic() {
|
|
592
|
-
local ic_conf="$1"
|
|
593
|
-
local ic_basename
|
|
594
|
-
ic_basename=$(basename "${ic_conf}" .conf)
|
|
595
|
-
|
|
596
|
-
# Source the IC config to check IC_DEPLOYED_NAME
|
|
597
|
-
# Use a subshell-safe approach: read the variable without polluting scope
|
|
598
|
-
local existing_ic_name=""
|
|
599
|
-
if grep -q "^export IC_DEPLOYED_NAME=" "${ic_conf}" 2>/dev/null; then
|
|
600
|
-
existing_ic_name=$(grep "^export IC_DEPLOYED_NAME=" "${ic_conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
|
|
601
|
-
fi
|
|
602
|
-
|
|
603
|
-
# --force-ic: delete existing IC before recreating
|
|
604
|
-
if [ "${FORCE_IC}" = true ] && [ -n "${existing_ic_name}" ]; then
|
|
605
|
-
echo "🔄 --force-ic: recreating IC '${ic_basename}'"
|
|
606
|
-
_delete_and_wait_ic "${existing_ic_name}"
|
|
607
|
-
|
|
608
|
-
# Clear deployed state from config before recreating
|
|
609
|
-
_update_config_var "IC_DEPLOYED_NAME" "" "${ic_conf}"
|
|
610
|
-
_update_config_var "IC_DEPLOYED_AT" "" "${ic_conf}"
|
|
611
|
-
existing_ic_name=""
|
|
612
|
-
fi
|
|
613
|
-
|
|
614
|
-
if [ "${FORCE_IC}" = true ] && [ -z "${existing_ic_name}" ]; then
|
|
615
|
-
# Force mode with no existing IC — just create new
|
|
616
|
-
create_inference_component "${ic_conf}"
|
|
617
|
-
elif [ -n "${existing_ic_name}" ]; then
|
|
618
|
-
# IC was previously deployed — check its current status
|
|
619
|
-
local ic_status
|
|
620
|
-
ic_status=$(_get_ic_status "${existing_ic_name}")
|
|
621
|
-
|
|
622
|
-
case "${ic_status}" in
|
|
623
|
-
InService)
|
|
624
|
-
echo "✅ IC '${ic_basename}' already InService: ${existing_ic_name} — skipping"
|
|
625
|
-
IC_DEPLOYED_NAME="${existing_ic_name}"
|
|
626
|
-
return 0
|
|
627
|
-
;;
|
|
628
|
-
Creating)
|
|
629
|
-
echo "⏳ IC '${ic_basename}' is still Creating: ${existing_ic_name} — waiting..."
|
|
630
|
-
IC_DEPLOYED_NAME="${existing_ic_name}"
|
|
631
|
-
wait_ic "${IC_DEPLOYED_NAME}"
|
|
632
|
-
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
633
|
-
return 0
|
|
634
|
-
;;
|
|
635
|
-
Failed)
|
|
636
|
-
echo "⚠️ IC '${ic_basename}' previously Failed: ${existing_ic_name} — recreating..."
|
|
637
|
-
create_inference_component "${ic_conf}"
|
|
638
|
-
;;
|
|
639
|
-
*)
|
|
640
|
-
echo " IC '${ic_basename}' has unknown/missing status for ${existing_ic_name} — creating new..."
|
|
641
|
-
create_inference_component "${ic_conf}"
|
|
642
|
-
;;
|
|
643
|
-
esac
|
|
644
|
-
else
|
|
645
|
-
# No previous deployment — create new IC
|
|
646
|
-
create_inference_component "${ic_conf}"
|
|
647
|
-
fi
|
|
648
|
-
|
|
649
|
-
echo "⏳ Waiting for inference component to reach InService status..."
|
|
650
|
-
echo " This may take 5-10 minutes..."
|
|
651
|
-
|
|
652
|
-
wait_ic "${IC_DEPLOYED_NAME}"
|
|
653
|
-
|
|
654
|
-
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
655
|
-
|
|
656
|
-
# Record inference component in manifest (non-blocking)
|
|
657
|
-
local ic_arn="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_DEPLOYED_NAME}"
|
|
658
|
-
./do/manifest add \
|
|
659
|
-
--type sagemaker-inference-component \
|
|
660
|
-
--id "${ic_arn}" \
|
|
661
|
-
--project "${PROJECT_NAME}" \
|
|
662
|
-
--meta "{\"inferenceComponentName\":\"${IC_DEPLOYED_NAME}\",\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE:-external}\",\"region\":\"${AWS_REGION}\"}" \
|
|
663
|
-
2>/dev/null || true
|
|
664
|
-
}
|
|
665
|
-
|
|
666
|
-
if [ -n "${IC_TARGET}" ]; then
|
|
667
|
-
# Single IC path: deploy only the named IC
|
|
668
|
-
echo ""
|
|
669
|
-
echo "── Deploying IC: ${IC_TARGET} ──"
|
|
670
|
-
_deploy_single_ic "${SCRIPT_DIR}/ic/${IC_TARGET}.conf"
|
|
671
|
-
else
|
|
672
|
-
# Multi-IC path: iterate all IC config files (alphabetical order)
|
|
673
|
-
IC_SUMMARY=""
|
|
674
|
-
IC_DEPLOY_FAILED=false
|
|
675
|
-
|
|
676
|
-
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
677
|
-
[ -f "${conf}" ] || continue
|
|
678
|
-
local_ic_basename=$(basename "${conf}" .conf)
|
|
679
|
-
echo ""
|
|
680
|
-
echo "── Deploying IC: ${local_ic_basename} ──"
|
|
681
|
-
|
|
682
|
-
if ! _deploy_single_ic "${conf}"; then
|
|
683
|
-
echo "❌ IC '${local_ic_basename}' failed to deploy. Stopping."
|
|
684
|
-
IC_SUMMARY="${IC_SUMMARY} ${local_ic_basename}: FAILED\n"
|
|
685
|
-
IC_DEPLOY_FAILED=true
|
|
686
|
-
break
|
|
687
|
-
fi
|
|
688
|
-
|
|
689
|
-
IC_SUMMARY="${IC_SUMMARY} ${local_ic_basename}: ${IC_DEPLOYED_NAME} [InService]\n"
|
|
690
|
-
done
|
|
691
|
-
|
|
692
|
-
# Print summary
|
|
693
|
-
echo ""
|
|
694
|
-
echo "📋 IC Deployment Summary:"
|
|
695
|
-
echo -e "${IC_SUMMARY}"
|
|
696
|
-
|
|
697
|
-
if [ "${IC_DEPLOY_FAILED}" = true ]; then
|
|
698
|
-
echo "❌ Deployment stopped due to IC failure. Fix the issue and re-run ./do/deploy to resume."
|
|
699
|
-
exit 4
|
|
700
|
-
fi
|
|
701
|
-
fi
|
|
702
|
-
else
|
|
703
|
-
# Legacy single-IC path: no do/ic/ directory
|
|
704
|
-
create_inference_component_legacy
|
|
705
|
-
|
|
706
|
-
echo "⏳ Waiting for inference component to reach InService status..."
|
|
707
|
-
echo " This may take 5-10 minutes..."
|
|
708
|
-
echo " If this times out, re-run ./do/deploy to resume."
|
|
709
|
-
|
|
710
|
-
wait_ic "${IC_DEPLOYED_NAME}"
|
|
711
|
-
|
|
712
|
-
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
713
|
-
|
|
714
|
-
# Record inference component in manifest (non-blocking)
|
|
715
|
-
IC_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_DEPLOYED_NAME}"
|
|
716
|
-
./do/manifest add \
|
|
717
|
-
--type sagemaker-inference-component \
|
|
718
|
-
--id "${IC_ARN}" \
|
|
719
|
-
--project "${PROJECT_NAME}" \
|
|
720
|
-
--meta "{\"inferenceComponentName\":\"${IC_DEPLOYED_NAME}\",\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE:-external}\",\"region\":\"${AWS_REGION}\"}" \
|
|
721
|
-
2>/dev/null || true
|
|
722
|
-
fi
|
|
723
|
-
|
|
724
|
-
elif [ "${SKIP_TO}" = "wait_ic" ]; then
|
|
725
|
-
# Resuming: just wait for the IC that was already being created
|
|
726
|
-
echo "⏳ Waiting for inference component to reach InService status..."
|
|
727
|
-
echo " This may take 5-10 minutes..."
|
|
728
|
-
echo " If this times out, re-run ./do/deploy to resume."
|
|
729
|
-
|
|
730
|
-
wait_ic "${IC_DEPLOYED_NAME}"
|
|
731
|
-
|
|
732
|
-
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
733
|
-
fi
|
|
734
|
-
|
|
735
|
-
echo "✅ Deployment complete!"
|
|
736
|
-
echo ""
|
|
737
|
-
echo "📋 Deployment Details:"
|
|
738
|
-
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
739
|
-
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
740
|
-
echo " Endpoint Config: (external — not managed by this project)"
|
|
741
|
-
echo " Region: ${AWS_REGION}"
|
|
742
|
-
else
|
|
743
|
-
echo " Endpoint Config: ${ENDPOINT_CONFIG_NAME:-N/A}"
|
|
744
|
-
echo " Region: ${AWS_REGION}"
|
|
745
|
-
echo " Instance Type: ${INSTANCE_TYPE}"
|
|
746
|
-
fi
|
|
747
|
-
echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
748
|
-
echo ""
|
|
749
|
-
echo "📋 What's next?"
|
|
750
|
-
echo " • Test your endpoint: ./do/test"
|
|
751
|
-
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
752
|
-
echo " • Benchmark performance: ./do/benchmark"
|
|
753
|
-
<% } %>
|
|
754
|
-
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
755
|
-
echo " • Add a LoRA adapter: ./do/adapter add <name> --weights s3://..."
|
|
756
|
-
<% } %>
|
|
757
|
-
echo " • View endpoint status: ./do/status"
|
|
758
|
-
echo " • Register this deployment: ./do/register"
|
|
759
|
-
echo " • View logs: ./do/logs"
|
|
760
|
-
<% if (!(typeof existingEndpointName !== 'undefined' && existingEndpointName)) { %>
|
|
761
|
-
echo " • Clean up when done: ./do/clean endpoint"
|
|
762
|
-
<% } %>
|
|
763
|
-
|
|
764
|
-
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
765
|
-
# ============================================================
|
|
766
|
-
# SageMaker Async Inference Deployment (Model-Based)
|
|
767
|
-
# SageMaker async inference does NOT support Inference Components.
|
|
768
|
-
# Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
|
|
769
|
-
# ============================================================
|
|
770
|
-
|
|
771
|
-
# Source shared helpers
|
|
772
|
-
source "${SCRIPT_DIR}/lib/secrets.sh"
|
|
773
|
-
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
774
|
-
|
|
775
|
-
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
776
|
-
resolve_secrets
|
|
777
|
-
|
|
778
|
-
# Validate execution role ARN
|
|
779
|
-
if [ -z "${ROLE_ARN:-}" ]; then
|
|
780
|
-
echo "❌ Execution role ARN not provided"
|
|
781
|
-
echo ""
|
|
782
|
-
echo "Usage:"
|
|
783
|
-
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
784
|
-
echo " ./do/deploy"
|
|
785
|
-
echo ""
|
|
786
|
-
echo "Or set ROLE_ARN in do/config"
|
|
787
|
-
echo ""
|
|
788
|
-
echo "The execution role must have permissions for:"
|
|
789
|
-
echo " • SageMaker model and endpoint management"
|
|
790
|
-
echo " • ECR image access"
|
|
791
|
-
echo " • S3 write access for async output path: ${ASYNC_S3_OUTPUT_PATH}"
|
|
792
|
-
echo " • SNS publish permissions (optional, for notifications)"
|
|
793
|
-
echo " • CloudWatch Logs"
|
|
794
|
-
exit 3
|
|
795
|
-
fi
|
|
796
|
-
|
|
797
|
-
echo " Using execution role: ${ROLE_ARN}"
|
|
798
|
-
|
|
799
|
-
# ============================================================
|
|
800
|
-
# Bootstrap async infrastructure (S3 bucket + SNS topics)
|
|
801
|
-
# ============================================================
|
|
802
|
-
|
|
803
|
-
# Extract bucket name from S3 output path
|
|
804
|
-
ASYNC_S3_BUCKET=$(echo "${ASYNC_S3_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
|
|
805
|
-
|
|
806
|
-
<% if (!asyncS3OutputPath) { %>
|
|
807
|
-
# Bootstrap default S3 bucket (check-and-create)
|
|
808
|
-
echo "🔍 Checking if S3 bucket exists: ${ASYNC_S3_BUCKET}"
|
|
809
|
-
if ! aws s3api head-bucket --bucket "${ASYNC_S3_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
810
|
-
echo "📦 Creating S3 bucket: ${ASYNC_S3_BUCKET}"
|
|
811
|
-
if [ "${AWS_REGION}" = "us-east-1" ]; then
|
|
812
|
-
if ! aws s3api create-bucket \
|
|
813
|
-
--bucket "${ASYNC_S3_BUCKET}" \
|
|
814
|
-
--region "${AWS_REGION}"; then
|
|
815
|
-
echo "❌ Failed to create S3 bucket: ${ASYNC_S3_BUCKET}"
|
|
816
|
-
echo ""
|
|
817
|
-
echo " Check that:"
|
|
818
|
-
echo " • Your IAM credentials have s3:CreateBucket permission"
|
|
819
|
-
echo " • The bucket name is not already taken globally"
|
|
820
|
-
exit 4
|
|
821
|
-
fi
|
|
822
|
-
else
|
|
823
|
-
if ! aws s3api create-bucket \
|
|
824
|
-
--bucket "${ASYNC_S3_BUCKET}" \
|
|
825
|
-
--region "${AWS_REGION}" \
|
|
826
|
-
--create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
|
|
827
|
-
echo "❌ Failed to create S3 bucket: ${ASYNC_S3_BUCKET}"
|
|
828
|
-
echo ""
|
|
829
|
-
echo " Check that:"
|
|
830
|
-
echo " • Your IAM credentials have s3:CreateBucket permission"
|
|
831
|
-
echo " • The bucket name is not already taken globally"
|
|
832
|
-
exit 4
|
|
833
|
-
fi
|
|
834
|
-
fi
|
|
835
|
-
echo "✅ S3 bucket created: ${ASYNC_S3_BUCKET}"
|
|
836
|
-
else
|
|
837
|
-
echo "✅ S3 bucket exists: ${ASYNC_S3_BUCKET}"
|
|
838
|
-
fi
|
|
839
|
-
<% } else { %>
|
|
840
|
-
# Custom S3 output path provided — skip bucket creation
|
|
841
|
-
echo "✅ Using custom S3 output path: ${ASYNC_S3_OUTPUT_PATH}"
|
|
842
|
-
<% } %>
|
|
843
|
-
|
|
844
|
-
# Extract topic name from SNS success topic ARN
|
|
845
|
-
ASYNC_SNS_SUCCESS_TOPIC_NAME=$(echo "${ASYNC_SNS_SUCCESS_TOPIC}" | awk -F: '{print $NF}')
|
|
846
|
-
|
|
847
|
-
<% if (!asyncSnsSuccessTopic) { %>
|
|
848
|
-
# Bootstrap default SNS success topic (check-and-create)
|
|
849
|
-
echo "🔍 Checking if SNS success topic exists: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
850
|
-
if ! aws sns get-topic-attributes --topic-arn "${ASYNC_SNS_SUCCESS_TOPIC}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
851
|
-
echo "📦 Creating SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
852
|
-
if ! aws sns create-topic \
|
|
853
|
-
--name "${ASYNC_SNS_SUCCESS_TOPIC_NAME}" \
|
|
854
|
-
--region "${AWS_REGION}" > /dev/null; then
|
|
855
|
-
echo "❌ Failed to create SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
856
|
-
echo ""
|
|
857
|
-
echo " Check that:"
|
|
858
|
-
echo " • Your IAM credentials have sns:CreateTopic permission"
|
|
859
|
-
exit 4
|
|
860
|
-
fi
|
|
861
|
-
echo "✅ SNS success topic created: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
862
|
-
else
|
|
863
|
-
echo "✅ SNS success topic exists: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
864
|
-
fi
|
|
865
|
-
|
|
866
|
-
# Record SNS success topic in manifest (non-blocking)
|
|
867
|
-
./do/manifest add \
|
|
868
|
-
--type sns-topic \
|
|
869
|
-
--id "${ASYNC_SNS_SUCCESS_TOPIC}" \
|
|
870
|
-
--project "${PROJECT_NAME}" \
|
|
871
|
-
--meta "{\"topicName\":\"${ASYNC_SNS_SUCCESS_TOPIC_NAME}\",\"purpose\":\"async-success\",\"region\":\"${AWS_REGION}\"}" \
|
|
872
|
-
2>/dev/null || true
|
|
873
|
-
|
|
874
|
-
<% } else { %>
|
|
875
|
-
# Custom SNS success topic ARN provided — skip topic creation
|
|
876
|
-
echo "✅ Using custom SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
877
|
-
|
|
878
|
-
# Record SNS success topic in manifest (non-blocking)
|
|
879
|
-
ASYNC_SNS_SUCCESS_TOPIC_NAME=$(echo "${ASYNC_SNS_SUCCESS_TOPIC}" | awk -F: '{print $NF}')
|
|
880
|
-
./do/manifest add \
|
|
881
|
-
--type sns-topic \
|
|
882
|
-
--id "${ASYNC_SNS_SUCCESS_TOPIC}" \
|
|
883
|
-
--project "${PROJECT_NAME}" \
|
|
884
|
-
--meta "{\"topicName\":\"${ASYNC_SNS_SUCCESS_TOPIC_NAME}\",\"purpose\":\"async-success\",\"region\":\"${AWS_REGION}\"}" \
|
|
885
|
-
2>/dev/null || true
|
|
886
|
-
|
|
887
|
-
<% } %>
|
|
888
|
-
|
|
889
|
-
# Extract topic name from SNS error topic ARN
|
|
890
|
-
ASYNC_SNS_ERROR_TOPIC_NAME=$(echo "${ASYNC_SNS_ERROR_TOPIC}" | awk -F: '{print $NF}')
|
|
891
|
-
|
|
892
|
-
<% if (!asyncSnsErrorTopic) { %>
|
|
893
|
-
# Bootstrap default SNS error topic (check-and-create)
|
|
894
|
-
echo "🔍 Checking if SNS error topic exists: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
895
|
-
if ! aws sns get-topic-attributes --topic-arn "${ASYNC_SNS_ERROR_TOPIC}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
896
|
-
echo "📦 Creating SNS error topic: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
897
|
-
if ! aws sns create-topic \
|
|
898
|
-
--name "${ASYNC_SNS_ERROR_TOPIC_NAME}" \
|
|
899
|
-
--region "${AWS_REGION}" > /dev/null; then
|
|
900
|
-
echo "❌ Failed to create SNS error topic: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
901
|
-
echo ""
|
|
902
|
-
echo " Check that:"
|
|
903
|
-
echo " • Your IAM credentials have sns:CreateTopic permission"
|
|
904
|
-
exit 4
|
|
905
|
-
fi
|
|
906
|
-
echo "✅ SNS error topic created: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
907
|
-
else
|
|
908
|
-
echo "✅ SNS error topic exists: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
909
|
-
fi
|
|
910
|
-
|
|
911
|
-
# Record SNS error topic in manifest (non-blocking)
|
|
912
|
-
./do/manifest add \
|
|
913
|
-
--type sns-topic \
|
|
914
|
-
--id "${ASYNC_SNS_ERROR_TOPIC}" \
|
|
915
|
-
--project "${PROJECT_NAME}" \
|
|
916
|
-
--meta "{\"topicName\":\"${ASYNC_SNS_ERROR_TOPIC_NAME}\",\"purpose\":\"async-error\",\"region\":\"${AWS_REGION}\"}" \
|
|
917
|
-
2>/dev/null || true
|
|
918
|
-
|
|
919
|
-
<% } else { %>
|
|
920
|
-
# Custom SNS error topic ARN provided — skip topic creation
|
|
921
|
-
echo "✅ Using custom SNS error topic: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
922
|
-
|
|
923
|
-
# Record SNS error topic in manifest (non-blocking)
|
|
924
|
-
ASYNC_SNS_ERROR_TOPIC_NAME=$(echo "${ASYNC_SNS_ERROR_TOPIC}" | awk -F: '{print $NF}')
|
|
925
|
-
./do/manifest add \
|
|
926
|
-
--type sns-topic \
|
|
927
|
-
--id "${ASYNC_SNS_ERROR_TOPIC}" \
|
|
928
|
-
--project "${PROJECT_NAME}" \
|
|
929
|
-
--meta "{\"topicName\":\"${ASYNC_SNS_ERROR_TOPIC_NAME}\",\"purpose\":\"async-error\",\"region\":\"${AWS_REGION}\"}" \
|
|
930
|
-
2>/dev/null || true
|
|
931
|
-
|
|
932
|
-
<% } %>
|
|
933
|
-
|
|
934
|
-
# ============================================================
|
|
935
|
-
# Create async endpoint (classic model-based flow)
|
|
936
|
-
# SageMaker async inference does NOT support Inference Components.
|
|
937
|
-
# Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
|
|
938
|
-
# ============================================================
|
|
939
|
-
|
|
940
|
-
# ============================================================
|
|
941
|
-
# Idempotency: check for existing deployment from a previous run
|
|
942
|
-
# ============================================================
|
|
943
|
-
SKIP_TO=""
|
|
944
|
-
|
|
945
|
-
if [ "${FORCE_NEW}" = true ]; then
|
|
946
|
-
echo "🔄 --force: ignoring previous deployment, creating new resources."
|
|
947
|
-
elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
948
|
-
echo "🔍 Checking for existing deployment: ${ENDPOINT_NAME}"
|
|
949
|
-
|
|
950
|
-
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
951
|
-
|
|
952
|
-
case "${EP_STATUS}" in
|
|
953
|
-
InService)
|
|
954
|
-
echo "✅ Async endpoint already InService: ${ENDPOINT_NAME}"
|
|
955
|
-
echo ""
|
|
956
|
-
echo "📋 Deployment is already live. Nothing to do."
|
|
957
|
-
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
958
|
-
echo ""
|
|
959
|
-
echo "🧪 Test your async endpoint:"
|
|
960
|
-
echo " ./do/test"
|
|
961
|
-
echo ""
|
|
962
|
-
echo "🧹 Clean up when done:"
|
|
963
|
-
echo " ./do/clean endpoint"
|
|
964
|
-
exit 0
|
|
965
|
-
;;
|
|
966
|
-
Creating|Updating)
|
|
967
|
-
echo "⏳ Endpoint still ${EP_STATUS}: ${ENDPOINT_NAME}"
|
|
968
|
-
SKIP_TO="wait_endpoint"
|
|
969
|
-
;;
|
|
970
|
-
Failed)
|
|
971
|
-
echo "⚠️ Previous endpoint failed: ${ENDPOINT_NAME}"
|
|
972
|
-
echo " Creating a new deployment. Clean up the failed endpoint with:"
|
|
973
|
-
echo " ./do/clean endpoint"
|
|
974
|
-
echo ""
|
|
975
|
-
;;
|
|
976
|
-
"")
|
|
977
|
-
echo " Previous endpoint not found (may have been cleaned up). Creating new deployment."
|
|
978
|
-
;;
|
|
979
|
-
*)
|
|
980
|
-
echo " Endpoint in unexpected state: ${EP_STATUS}. Creating new deployment."
|
|
981
|
-
;;
|
|
982
|
-
esac
|
|
983
|
-
fi
|
|
984
|
-
|
|
985
|
-
# ============================================================
|
|
986
|
-
# Create async resources (skip if resuming from wait)
|
|
987
|
-
# ============================================================
|
|
988
|
-
if [ -z "${SKIP_TO}" ]; then
|
|
989
|
-
TIMESTAMP=$(date +%s)
|
|
990
|
-
MODEL_NAME_SM="${PROJECT_NAME}-async-model-${TIMESTAMP}"
|
|
991
|
-
ENDPOINT_CONFIG_NAME="${PROJECT_NAME}-async-epc-${TIMESTAMP}"
|
|
992
|
-
ENDPOINT_NAME="${PROJECT_NAME}-async-ep-${TIMESTAMP}"
|
|
993
|
-
|
|
994
|
-
_update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
|
|
995
|
-
_update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
|
|
996
|
-
_update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
|
|
997
|
-
|
|
998
|
-
# Step 1: Create SageMaker model
|
|
999
|
-
# Build primary container spec
|
|
1000
|
-
PRIMARY_CONTAINER="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
|
|
1001
|
-
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
1002
|
-
PRIMARY_CONTAINER="${PRIMARY_CONTAINER},\"Environment\":{${CONTAINER_ENV_JSON}}"
|
|
1003
|
-
fi
|
|
1004
|
-
PRIMARY_CONTAINER="${PRIMARY_CONTAINER}}"
|
|
1005
|
-
|
|
1006
|
-
echo "📦 Creating SageMaker model: ${MODEL_NAME_SM}"
|
|
1007
|
-
if ! aws sagemaker create-model \
|
|
1008
|
-
--model-name "${MODEL_NAME_SM}" \
|
|
1009
|
-
--primary-container "${PRIMARY_CONTAINER}" \
|
|
1010
|
-
--execution-role-arn "${ROLE_ARN}" \
|
|
1011
|
-
--region "${AWS_REGION}"; then
|
|
1012
|
-
|
|
1013
|
-
echo "❌ Failed to create SageMaker model"
|
|
1014
|
-
echo " Check that:"
|
|
1015
|
-
echo " • The execution role ARN is valid"
|
|
1016
|
-
echo " • The ECR image exists and is accessible"
|
|
1017
|
-
echo " • The IAM role has ecr:GetDownloadUrlForLayer permission"
|
|
1018
|
-
exit 4
|
|
1019
|
-
fi
|
|
1020
|
-
|
|
1021
|
-
echo "✅ SageMaker model created: ${MODEL_NAME_SM}"
|
|
1022
|
-
|
|
1023
|
-
# Record model in manifest (non-blocking)
|
|
1024
|
-
MODEL_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${MODEL_NAME_SM}"
|
|
1025
|
-
./do/manifest add \
|
|
1026
|
-
--type sagemaker-model \
|
|
1027
|
-
--id "${MODEL_ARN}" \
|
|
1028
|
-
--project "${PROJECT_NAME}" \
|
|
1029
|
-
--meta "{\"modelName\":\"${MODEL_NAME_SM}\",\"region\":\"${AWS_REGION}\"}" \
|
|
1030
|
-
2>/dev/null || true
|
|
1031
|
-
|
|
1032
|
-
# Build production variant JSON (classic: includes ModelName, no execution-role-arn on endpoint config)
|
|
1033
|
-
VARIANT_JSON="[{\"VariantName\":\"AllTraffic\",\"ModelName\":\"${MODEL_NAME_SM}\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1"
|
|
1034
|
-
|
|
1035
|
-
# Append InferenceAmiVersion if configured
|
|
1036
|
-
if [ -n "${INFERENCE_AMI_VERSION:-}" ]; then
|
|
1037
|
-
VARIANT_JSON="${VARIANT_JSON},\"InferenceAmiVersion\":\"${INFERENCE_AMI_VERSION}\""
|
|
1038
|
-
echo " AMI version: ${INFERENCE_AMI_VERSION}"
|
|
1039
|
-
fi
|
|
1040
|
-
|
|
1041
|
-
VARIANT_JSON="${VARIANT_JSON}}]"
|
|
1042
|
-
|
|
1043
|
-
# Build AsyncInferenceConfig JSON
|
|
1044
|
-
ASYNC_CONFIG="{\"OutputConfig\":{\"S3OutputPath\":\"${ASYNC_S3_OUTPUT_PATH}\",\"NotificationConfig\":{\"SuccessTopic\":\"${ASYNC_SNS_SUCCESS_TOPIC}\",\"ErrorTopic\":\"${ASYNC_SNS_ERROR_TOPIC}\"}}"
|
|
1045
|
-
|
|
1046
|
-
if [ -n "${ASYNC_MAX_CONCURRENT_INVOCATIONS:-}" ]; then
|
|
1047
|
-
ASYNC_CONFIG="${ASYNC_CONFIG},\"ClientConfig\":{\"MaxConcurrentInvocationsPerInstance\":${ASYNC_MAX_CONCURRENT_INVOCATIONS}}"
|
|
1048
|
-
fi
|
|
1049
|
-
|
|
1050
|
-
ASYNC_CONFIG="${ASYNC_CONFIG}}"
|
|
1051
|
-
|
|
1052
|
-
# Step 2: Create endpoint configuration with AsyncInferenceConfig (no --execution-role-arn)
|
|
1053
|
-
echo "⚙️ Creating async endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
|
|
1054
|
-
if ! aws sagemaker create-endpoint-config \
|
|
1055
|
-
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
1056
|
-
--production-variants "${VARIANT_JSON}" \
|
|
1057
|
-
--async-inference-config "${ASYNC_CONFIG}" \
|
|
1058
|
-
--region "${AWS_REGION}"; then
|
|
1059
|
-
|
|
1060
|
-
echo "❌ Failed to create async endpoint configuration"
|
|
1061
|
-
echo " Check that:"
|
|
1062
|
-
echo " • The S3 output path is accessible: ${ASYNC_S3_OUTPUT_PATH}"
|
|
1063
|
-
echo " • The IAM role has s3:PutObject permission on the output path"
|
|
1064
|
-
echo " • The instance type is valid: ${INSTANCE_TYPE}"
|
|
1065
|
-
echo " • The instance type is available in region: ${AWS_REGION}"
|
|
1066
|
-
echo " • You have sufficient service quota for the instance type"
|
|
1067
|
-
exit 4
|
|
1068
|
-
fi
|
|
1069
|
-
|
|
1070
|
-
echo "✅ Async endpoint configuration created: ${ENDPOINT_CONFIG_NAME}"
|
|
1071
|
-
|
|
1072
|
-
# Record endpoint config in manifest (non-blocking)
|
|
1073
|
-
ENDPOINT_CONFIG_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${ENDPOINT_CONFIG_NAME}"
|
|
1074
|
-
./do/manifest add \
|
|
1075
|
-
--type sagemaker-endpoint-config \
|
|
1076
|
-
--id "${ENDPOINT_CONFIG_ARN}" \
|
|
1077
|
-
--project "${PROJECT_NAME}" \
|
|
1078
|
-
--meta "{\"endpointConfigName\":\"${ENDPOINT_CONFIG_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
1079
|
-
2>/dev/null || true
|
|
1080
|
-
|
|
1081
|
-
# Step 3: Create endpoint
|
|
1082
|
-
echo "🚀 Creating async endpoint: ${ENDPOINT_NAME}"
|
|
1083
|
-
if ! aws sagemaker create-endpoint \
|
|
1084
|
-
--endpoint-name "${ENDPOINT_NAME}" \
|
|
1085
|
-
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
1086
|
-
--region "${AWS_REGION}"; then
|
|
1087
|
-
|
|
1088
|
-
echo "❌ Failed to create async endpoint"
|
|
1089
|
-
echo " Check that:"
|
|
1090
|
-
echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
|
|
1091
|
-
echo " • You have sufficient service quota in region: ${AWS_REGION}"
|
|
1092
|
-
exit 4
|
|
1093
|
-
fi
|
|
1094
|
-
|
|
1095
|
-
echo "✅ Async endpoint creation initiated: ${ENDPOINT_NAME}"
|
|
1096
|
-
|
|
1097
|
-
# Record endpoint in manifest (non-blocking)
|
|
1098
|
-
ENDPOINT_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${ENDPOINT_NAME}"
|
|
1099
|
-
./do/manifest add \
|
|
1100
|
-
--type sagemaker-endpoint \
|
|
1101
|
-
--id "${ENDPOINT_ARN}" \
|
|
1102
|
-
--project "${PROJECT_NAME}" \
|
|
1103
|
-
--meta "{\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
1104
|
-
2>/dev/null || true
|
|
1105
|
-
fi
|
|
1106
|
-
|
|
1107
|
-
# ============================================================
|
|
1108
|
-
# Wait for endpoint (skip if already InService)
|
|
1109
|
-
# ============================================================
|
|
1110
|
-
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
1111
|
-
echo "⏳ Waiting for async endpoint to reach InService status..."
|
|
1112
|
-
echo " This may take several minutes..."
|
|
1113
|
-
echo " If this times out, re-run ./do/deploy to resume."
|
|
1114
|
-
|
|
1115
|
-
wait_endpoint "${ENDPOINT_NAME}"
|
|
1116
|
-
fi
|
|
1117
|
-
|
|
1118
|
-
echo "✅ Async deployment complete!"
|
|
1119
|
-
echo ""
|
|
1120
|
-
echo "📋 Deployment Details:"
|
|
1121
|
-
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
1122
|
-
echo " Endpoint Config: ${ENDPOINT_CONFIG_NAME}"
|
|
1123
|
-
echo " Model: ${MODEL_NAME_SM}"
|
|
1124
|
-
echo " Region: ${AWS_REGION}"
|
|
1125
|
-
echo " Instance Type: ${INSTANCE_TYPE}"
|
|
1126
|
-
echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
1127
|
-
echo " S3 Output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
1128
|
-
echo " SNS Success: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
1129
|
-
echo " SNS Error: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
1130
|
-
echo ""
|
|
1131
|
-
echo "📋 What's next?"
|
|
1132
|
-
echo " • Test your async endpoint: ./do/test"
|
|
1133
|
-
echo " • Check async output: aws s3 ls ${ASYNC_S3_OUTPUT_PATH}"
|
|
1134
|
-
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
1135
|
-
echo " • Benchmark performance: ./do/benchmark"
|
|
1136
|
-
<% } %>
|
|
1137
|
-
echo " • Register this deployment: ./do/register"
|
|
1138
|
-
echo " • View logs: ./do/logs"
|
|
1139
|
-
echo " • Clean up when done: ./do/clean endpoint"
|
|
1140
|
-
|
|
1141
|
-
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
1142
|
-
# ============================================================
|
|
1143
|
-
# HyperPod EKS Deployment
|
|
1144
|
-
# ============================================================
|
|
1145
|
-
|
|
1146
|
-
# Get kubeconfig for HyperPod cluster
|
|
1147
|
-
echo "🔑 Configuring kubectl for HyperPod cluster..."
|
|
1148
|
-
KUBECONFIG_PATH="${HOME}/.kube/hyperpod-${HYPERPOD_CLUSTER_NAME}"
|
|
1149
|
-
|
|
1150
|
-
# Step 1: Describe the HyperPod cluster to get the underlying EKS cluster ARN
|
|
1151
|
-
EKS_CLUSTER_ARN=$(aws sagemaker describe-cluster \
|
|
1152
|
-
--cluster-name "${HYPERPOD_CLUSTER_NAME}" \
|
|
1153
|
-
--region "${AWS_REGION}" \
|
|
1154
|
-
--query "Orchestrator.Eks.ClusterArn" \
|
|
1155
|
-
--output text 2>&1) || {
|
|
1156
|
-
echo "❌ Failed to describe HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
1157
|
-
echo ""
|
|
1158
|
-
echo " Error details:"
|
|
1159
|
-
echo " ${EKS_CLUSTER_ARN}"
|
|
1160
|
-
echo ""
|
|
1161
|
-
echo " Check that:"
|
|
1162
|
-
echo " • The cluster name is correct"
|
|
1163
|
-
echo " • The cluster exists in region: ${AWS_REGION}"
|
|
1164
|
-
echo " • Your IAM user/role has permission to access the cluster"
|
|
1165
|
-
echo ""
|
|
1166
|
-
echo " Required IAM permissions:"
|
|
1167
|
-
echo " • sagemaker:DescribeCluster"
|
|
1168
|
-
echo " • eks:DescribeCluster"
|
|
1169
|
-
exit 4
|
|
1170
|
-
}
|
|
1171
|
-
|
|
1172
|
-
# Step 2: Extract the EKS cluster name from the ARN
|
|
1173
|
-
EKS_CLUSTER_NAME=$(echo "${EKS_CLUSTER_ARN}" | awk -F'/' '{print $NF}')
|
|
1174
|
-
echo " HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
1175
|
-
echo " EKS cluster: ${EKS_CLUSTER_NAME}"
|
|
1176
|
-
|
|
1177
|
-
# Step 3: Update kubeconfig using the EKS cluster
|
|
1178
|
-
if ! aws eks update-kubeconfig \
|
|
1179
|
-
--name "${EKS_CLUSTER_NAME}" \
|
|
1180
|
-
--region "${AWS_REGION}" \
|
|
1181
|
-
--kubeconfig "${KUBECONFIG_PATH}" 2>&1; then
|
|
1182
|
-
echo "❌ Failed to configure kubectl for EKS cluster: ${EKS_CLUSTER_NAME}"
|
|
1183
|
-
echo ""
|
|
1184
|
-
echo " Required IAM permissions:"
|
|
1185
|
-
echo " • eks:DescribeCluster"
|
|
1186
|
-
echo " • eks:AccessKubernetesApi"
|
|
1187
|
-
exit 4
|
|
1188
|
-
fi
|
|
1189
|
-
|
|
1190
|
-
export KUBECONFIG="${KUBECONFIG_PATH}"
|
|
1191
|
-
echo "✅ Kubeconfig saved to: ${KUBECONFIG_PATH}"
|
|
1192
|
-
|
|
1193
|
-
# Verify cluster connectivity
|
|
1194
|
-
echo "🔍 Verifying cluster connectivity..."
|
|
1195
|
-
if ! kubectl cluster-info &> /dev/null; then
|
|
1196
|
-
echo "❌ Cannot connect to HyperPod cluster"
|
|
1197
|
-
echo ""
|
|
1198
|
-
echo " Check that:"
|
|
1199
|
-
echo " • The cluster is in 'InService' status"
|
|
1200
|
-
echo " • Your network can reach the cluster API server"
|
|
1201
|
-
echo " • Your IAM credentials are valid"
|
|
1202
|
-
exit 4
|
|
1203
|
-
fi
|
|
1204
|
-
echo "✅ Connected to HyperPod cluster"
|
|
1205
|
-
|
|
1206
|
-
# Create namespace if it doesn't exist
|
|
1207
|
-
echo "📁 Ensuring namespace exists: ${HYPERPOD_NAMESPACE}"
|
|
1208
|
-
if ! kubectl create namespace "${HYPERPOD_NAMESPACE}" --dry-run=client -o yaml | kubectl apply -f - 2>&1; then
|
|
1209
|
-
echo "⚠️ Warning: Could not create/verify namespace"
|
|
1210
|
-
fi
|
|
1211
|
-
|
|
1212
|
-
# Apply Kubernetes manifests
|
|
1213
|
-
echo "📄 Applying Kubernetes manifests from hyperpod/..."
|
|
1214
|
-
|
|
1215
|
-
# Substitute shell variables (e.g. ${AWS_ACCOUNT_ID}) in manifests before applying
|
|
1216
|
-
export AWS_ACCOUNT_ID
|
|
1217
|
-
export ECR_IMAGE="${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
1218
|
-
|
|
1219
|
-
APPLY_OUTPUT=""
|
|
1220
|
-
APPLY_EXIT_CODE=0
|
|
1221
|
-
for manifest in hyperpod/*.yaml; do
|
|
1222
|
-
# Skip files that contain no Kubernetes objects (e.g. comment-only PVC stubs)
|
|
1223
|
-
RENDERED=$(envsubst < "${manifest}")
|
|
1224
|
-
if echo "${RENDERED}" | grep -q '^kind:'; then
|
|
1225
|
-
FILE_OUTPUT=$(echo "${RENDERED}" | kubectl apply -n "${HYPERPOD_NAMESPACE}" -f - 2>&1) || {
|
|
1226
|
-
APPLY_EXIT_CODE=$?
|
|
1227
|
-
}
|
|
1228
|
-
APPLY_OUTPUT="${APPLY_OUTPUT}${FILE_OUTPUT}\n"
|
|
1229
|
-
fi
|
|
1230
|
-
done
|
|
1231
|
-
|
|
1232
|
-
if [ "${APPLY_EXIT_CODE}" -ne 0 ]; then
|
|
1233
|
-
echo ""
|
|
1234
|
-
echo "❌ Failed to apply Kubernetes manifests"
|
|
1235
|
-
echo ""
|
|
1236
|
-
echo " Error details:"
|
|
1237
|
-
echo " ${APPLY_OUTPUT}"
|
|
1238
|
-
echo ""
|
|
1239
|
-
echo " Common issues:"
|
|
1240
|
-
echo " • Insufficient node capacity - check available GPU nodes"
|
|
1241
|
-
echo " • Resource requests exceed node capacity"
|
|
1242
|
-
echo " • RBAC permissions - ensure you have permission to create resources in namespace '${HYPERPOD_NAMESPACE}'"
|
|
1243
|
-
echo " • Invalid manifest syntax"
|
|
1244
|
-
<% if (fsxVolumeHandle) { %>
|
|
1245
|
-
echo " • PVC creation failure - verify the FSx CSI driver is installed on the cluster"
|
|
1246
|
-
echo " kubectl get csidriver -o name | grep fsx"
|
|
1247
|
-
<% } %>
|
|
1248
|
-
echo ""
|
|
1249
|
-
echo " Debug commands:"
|
|
1250
|
-
echo " kubectl get nodes -o wide"
|
|
1251
|
-
echo " kubectl describe nodes"
|
|
1252
|
-
echo " kubectl get events -n ${HYPERPOD_NAMESPACE}"
|
|
1253
|
-
exit ${APPLY_EXIT_CODE}
|
|
1254
|
-
fi
|
|
1255
|
-
|
|
1256
|
-
echo "✅ Kubernetes manifests applied"
|
|
1257
|
-
|
|
1258
|
-
# Record k8s deployment and service in manifest (non-blocking)
|
|
1259
|
-
./do/manifest add \
|
|
1260
|
-
--type k8s-deployment \
|
|
1261
|
-
--id "${HYPERPOD_NAMESPACE}/${PROJECT_NAME}" \
|
|
1262
|
-
--project "${PROJECT_NAME}" \
|
|
1263
|
-
--meta "{\"namespace\":\"${HYPERPOD_NAMESPACE}\",\"deploymentName\":\"${PROJECT_NAME}\",\"clusterName\":\"${HYPERPOD_CLUSTER_NAME}\",\"region\":\"${AWS_REGION}\"}" \
|
|
1264
|
-
2>/dev/null || true
|
|
1265
|
-
|
|
1266
|
-
./do/manifest add \
|
|
1267
|
-
--type k8s-service \
|
|
1268
|
-
--id "${HYPERPOD_NAMESPACE}/${PROJECT_NAME}" \
|
|
1269
|
-
--project "${PROJECT_NAME}" \
|
|
1270
|
-
--meta "{\"namespace\":\"${HYPERPOD_NAMESPACE}\",\"serviceName\":\"${PROJECT_NAME}\",\"clusterName\":\"${HYPERPOD_CLUSTER_NAME}\",\"region\":\"${AWS_REGION}\"}" \
|
|
1271
|
-
2>/dev/null || true
|
|
1272
|
-
|
|
1273
|
-
# Wait for deployment to be ready
|
|
1274
|
-
DEPLOY_TIMEOUT=${DEPLOY_TIMEOUT:-1200}
|
|
1275
|
-
echo "⏳ Waiting for deployment to be ready (timeout: ${DEPLOY_TIMEOUT}s)..."
|
|
1276
|
-
echo " This may take several minutes for GPU workloads..."
|
|
1277
|
-
echo ""
|
|
1278
|
-
|
|
1279
|
-
# Poll pod status every 30s while rollout is in progress
|
|
1280
|
-
(
|
|
1281
|
-
while true; do
|
|
1282
|
-
sleep 30
|
|
1283
|
-
POD_STATUS=$(kubectl get pods -n "${HYPERPOD_NAMESPACE}" -l app=${PROJECT_NAME} \
|
|
1284
|
-
--no-headers 2>/dev/null | head -5)
|
|
1285
|
-
if [ -n "${POD_STATUS}" ]; then
|
|
1286
|
-
echo " 📊 $(date +%H:%M:%S) Pod status:"
|
|
1287
|
-
echo "${POD_STATUS}" | while read -r line; do echo " ${line}"; done
|
|
1288
|
-
fi
|
|
1289
|
-
done
|
|
1290
|
-
) &
|
|
1291
|
-
STATUS_PID=$!
|
|
1292
|
-
trap "kill ${STATUS_PID} 2>/dev/null; wait ${STATUS_PID} 2>/dev/null" EXIT
|
|
1293
|
-
|
|
1294
|
-
ROLLOUT_OUTPUT=$(kubectl rollout status deployment/${PROJECT_NAME} -n "${HYPERPOD_NAMESPACE}" --timeout=${DEPLOY_TIMEOUT}s 2>&1) || {
|
|
1295
|
-
ROLLOUT_EXIT_CODE=$?
|
|
1296
|
-
kill ${STATUS_PID} 2>/dev/null
|
|
1297
|
-
echo ""
|
|
1298
|
-
echo "❌ Deployment failed to become ready within timeout"
|
|
1299
|
-
echo ""
|
|
1300
|
-
echo " Error details:"
|
|
1301
|
-
echo " ${ROLLOUT_OUTPUT}"
|
|
1302
|
-
echo ""
|
|
1303
|
-
echo " Current pod state:"
|
|
1304
|
-
kubectl get pods -n "${HYPERPOD_NAMESPACE}" -l app=${PROJECT_NAME} -o wide 2>/dev/null
|
|
1305
|
-
echo ""
|
|
1306
|
-
echo " Debug commands:"
|
|
1307
|
-
echo " kubectl describe pods -n ${HYPERPOD_NAMESPACE} -l app=${PROJECT_NAME}"
|
|
1308
|
-
echo " kubectl logs -n ${HYPERPOD_NAMESPACE} -l app=${PROJECT_NAME} --tail=100"
|
|
1309
|
-
echo ""
|
|
1310
|
-
echo " Common issues:"
|
|
1311
|
-
echo " • Image pull errors - check ECR permissions"
|
|
1312
|
-
echo " • Resource scheduling - insufficient GPU nodes"
|
|
1313
|
-
echo " • Container crash - check application logs"
|
|
1314
|
-
<% if (fsxVolumeHandle) { %>
|
|
1315
|
-
echo " • PVC binding errors - verify FSx CSI driver is installed on the cluster"
|
|
1316
|
-
echo " kubectl get pvc -n ${HYPERPOD_NAMESPACE}"
|
|
1317
|
-
echo " kubectl describe pvc -n ${HYPERPOD_NAMESPACE}"
|
|
1318
|
-
echo " kubectl get csidriver -o name | grep fsx"
|
|
1319
|
-
<% } %>
|
|
1320
|
-
exit ${ROLLOUT_EXIT_CODE}
|
|
1321
|
-
}
|
|
1322
|
-
|
|
1323
|
-
kill ${STATUS_PID} 2>/dev/null
|
|
1324
|
-
wait ${STATUS_PID} 2>/dev/null
|
|
1325
|
-
|
|
1326
|
-
echo "✅ HyperPod EKS deployment complete!"
|
|
1327
|
-
echo ""
|
|
1328
|
-
echo "📋 Deployment Details:"
|
|
1329
|
-
echo " Cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
1330
|
-
echo " Namespace: ${HYPERPOD_NAMESPACE}"
|
|
1331
|
-
echo " Deployment: ${PROJECT_NAME}"
|
|
1332
|
-
echo " Replicas: ${HYPERPOD_REPLICAS}"
|
|
1333
|
-
echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
1334
|
-
echo ""
|
|
1335
|
-
echo "📋 What's next?"
|
|
1336
|
-
echo " • Test your deployment: ./do/test"
|
|
1337
|
-
echo " • Check pod status: kubectl get pods -n ${HYPERPOD_NAMESPACE}"
|
|
1338
|
-
echo " • View pod logs: kubectl logs -n ${HYPERPOD_NAMESPACE} -l app=${PROJECT_NAME}"
|
|
1339
|
-
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
1340
|
-
echo " • Benchmark performance: ./do/benchmark"
|
|
1341
|
-
<% } %>
|
|
1342
|
-
echo " • Register this deployment: ./do/register"
|
|
1343
|
-
echo " • View logs: ./do/logs"
|
|
1344
|
-
echo " • Clean up when done: ./do/clean hyperpod"
|
|
1345
|
-
|
|
1346
|
-
# Write kubeconfig path to config so other scripts can use it (idempotent)
|
|
1347
|
-
_update_config_var() {
|
|
1348
|
-
local var_name="$1" var_value="$2" config_file="${SCRIPT_DIR}/config"
|
|
1349
|
-
if grep -q "^export ${var_name}=" "${config_file}" 2>/dev/null; then
|
|
1350
|
-
sed -i.bak "s|^export ${var_name}=.*|export ${var_name}=\"${var_value}\"|" "${config_file}"
|
|
1351
|
-
rm -f "${config_file}.bak"
|
|
1352
|
-
else
|
|
1353
|
-
echo "" >> "${config_file}"
|
|
1354
|
-
echo "export ${var_name}=\"${var_value}\"" >> "${config_file}"
|
|
1355
|
-
fi
|
|
1356
|
-
}
|
|
1357
|
-
|
|
1358
|
-
_update_config_var "KUBECONFIG" "${KUBECONFIG_PATH}"
|
|
1359
|
-
|
|
1360
|
-
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
1361
|
-
# ============================================================
|
|
1362
|
-
# SageMaker Batch Transform Deployment
|
|
1363
|
-
# Flow: create-model → create-transform-job → poll until completion
|
|
1364
|
-
# ============================================================
|
|
1365
|
-
|
|
1366
|
-
# Source shared helpers
|
|
1367
|
-
source "${SCRIPT_DIR}/lib/secrets.sh"
|
|
1368
|
-
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
1369
|
-
|
|
1370
|
-
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
1371
|
-
resolve_secrets
|
|
1372
|
-
|
|
1373
|
-
# Validate execution role ARN
|
|
1374
|
-
if [ -z "${ROLE_ARN:-}" ]; then
|
|
1375
|
-
echo "❌ Execution role ARN not provided"
|
|
1376
|
-
echo ""
|
|
1377
|
-
echo "Usage:"
|
|
1378
|
-
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
1379
|
-
echo " ./do/deploy"
|
|
1380
|
-
echo ""
|
|
1381
|
-
echo "Or set ROLE_ARN in do/config"
|
|
1382
|
-
echo ""
|
|
1383
|
-
echo "The execution role must have permissions for:"
|
|
1384
|
-
echo " • SageMaker model and transform job management"
|
|
1385
|
-
echo " • ECR image access"
|
|
1386
|
-
echo " • S3 read access for input path: ${BATCH_INPUT_PATH}"
|
|
1387
|
-
echo " • S3 write access for output path: ${BATCH_OUTPUT_PATH}"
|
|
1388
|
-
echo " • CloudWatch Logs"
|
|
1389
|
-
exit 3
|
|
1390
|
-
fi
|
|
1391
|
-
|
|
1392
|
-
echo " Using execution role: ${ROLE_ARN}"
|
|
1393
|
-
|
|
1394
|
-
# Validate S3 input path
|
|
1395
|
-
if [ -z "${BATCH_INPUT_PATH:-}" ]; then
|
|
1396
|
-
echo "❌ S3 input path not provided"
|
|
1397
|
-
echo ""
|
|
1398
|
-
echo "Set BATCH_INPUT_PATH in do/config or provide via CLI:"
|
|
1399
|
-
echo " export BATCH_INPUT_PATH=s3://my-bucket/input/"
|
|
1400
|
-
echo " ./do/deploy"
|
|
1401
|
-
exit 3
|
|
1402
|
-
fi
|
|
1403
|
-
|
|
1404
|
-
if [[ "${BATCH_INPUT_PATH}" != s3://* ]]; then
|
|
1405
|
-
echo "❌ S3 input path must start with s3://"
|
|
1406
|
-
echo " Current value: ${BATCH_INPUT_PATH}"
|
|
1407
|
-
echo " Example: s3://my-bucket/input/"
|
|
1408
|
-
exit 3
|
|
1409
|
-
fi
|
|
1410
|
-
|
|
1411
|
-
# Validate S3 output path
|
|
1412
|
-
if [ -z "${BATCH_OUTPUT_PATH:-}" ]; then
|
|
1413
|
-
echo "❌ S3 output path not provided"
|
|
1414
|
-
echo ""
|
|
1415
|
-
echo "Set BATCH_OUTPUT_PATH in do/config or provide via CLI:"
|
|
1416
|
-
echo " export BATCH_OUTPUT_PATH=s3://my-bucket/output/"
|
|
1417
|
-
echo " ./do/deploy"
|
|
1418
|
-
exit 3
|
|
1419
|
-
fi
|
|
1420
|
-
|
|
1421
|
-
if [[ "${BATCH_OUTPUT_PATH}" != s3://* ]]; then
|
|
1422
|
-
echo "❌ S3 output path must start with s3://"
|
|
1423
|
-
echo " Current value: ${BATCH_OUTPUT_PATH}"
|
|
1424
|
-
echo " Example: s3://my-bucket/output/"
|
|
1425
|
-
exit 3
|
|
1426
|
-
fi
|
|
1427
|
-
|
|
1428
|
-
# ============================================================
|
|
1429
|
-
# Bootstrap S3 buckets for batch transform
|
|
1430
|
-
# ============================================================
|
|
1431
|
-
|
|
1432
|
-
# Extract bucket names from S3 paths
|
|
1433
|
-
BATCH_INPUT_BUCKET=$(echo "${BATCH_INPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
|
|
1434
|
-
BATCH_OUTPUT_BUCKET=$(echo "${BATCH_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
|
|
1435
|
-
|
|
1436
|
-
<% if (!batchInputPath) { %>
|
|
1437
|
-
# Bootstrap default S3 input bucket (check-and-create)
|
|
1438
|
-
echo "🔍 Checking if S3 input bucket exists: ${BATCH_INPUT_BUCKET}"
|
|
1439
|
-
if ! aws s3api head-bucket --bucket "${BATCH_INPUT_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
1440
|
-
echo "📦 Creating S3 input bucket: ${BATCH_INPUT_BUCKET}"
|
|
1441
|
-
if [ "${AWS_REGION}" = "us-east-1" ]; then
|
|
1442
|
-
if ! aws s3api create-bucket \
|
|
1443
|
-
--bucket "${BATCH_INPUT_BUCKET}" \
|
|
1444
|
-
--region "${AWS_REGION}"; then
|
|
1445
|
-
echo "❌ Failed to create S3 input bucket: ${BATCH_INPUT_BUCKET}"
|
|
1446
|
-
echo ""
|
|
1447
|
-
echo " Check that:"
|
|
1448
|
-
echo " • Your IAM credentials have s3:CreateBucket permission"
|
|
1449
|
-
echo " • The bucket name is not already taken globally"
|
|
1450
|
-
exit 4
|
|
1451
|
-
fi
|
|
1452
|
-
else
|
|
1453
|
-
if ! aws s3api create-bucket \
|
|
1454
|
-
--bucket "${BATCH_INPUT_BUCKET}" \
|
|
1455
|
-
--region "${AWS_REGION}" \
|
|
1456
|
-
--create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
|
|
1457
|
-
echo "❌ Failed to create S3 input bucket: ${BATCH_INPUT_BUCKET}"
|
|
1458
|
-
echo ""
|
|
1459
|
-
echo " Check that:"
|
|
1460
|
-
echo " • Your IAM credentials have s3:CreateBucket permission"
|
|
1461
|
-
echo " • The bucket name is not already taken globally"
|
|
1462
|
-
exit 4
|
|
1463
|
-
fi
|
|
1464
|
-
fi
|
|
1465
|
-
echo "✅ S3 input bucket created: ${BATCH_INPUT_BUCKET}"
|
|
1466
|
-
else
|
|
1467
|
-
echo "✅ S3 input bucket exists: ${BATCH_INPUT_BUCKET}"
|
|
1468
|
-
fi
|
|
1469
|
-
|
|
1470
|
-
# Upload sample input file if the input prefix is empty
|
|
1471
|
-
EXISTING_OBJECTS=$(aws s3 ls "${BATCH_INPUT_PATH}" --region "${AWS_REGION}" 2>/dev/null | head -1 || true)
|
|
1472
|
-
if [ -z "${EXISTING_OBJECTS}" ]; then
|
|
1473
|
-
echo "📄 Uploading sample input file to ${BATCH_INPUT_PATH}"
|
|
1474
|
-
<% if (framework === 'transformers' && (modelServer === 'vllm' || modelServer === 'sglang')) { %>
|
|
1475
|
-
echo '{"model": "<%= modelName %>", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50}' | aws s3 cp - "${BATCH_INPUT_PATH}sample.jsonl" --region "${AWS_REGION}"
|
|
1476
|
-
<% } else if (framework === 'transformers') { %>
|
|
1477
|
-
echo '{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50}}' | aws s3 cp - "${BATCH_INPUT_PATH}sample.jsonl" --region "${AWS_REGION}"
|
|
1478
|
-
<% } else if (framework === 'diffusors') { %>
|
|
1479
|
-
echo '{"prompt": "A white cat", "n": 1, "size": "512x512"}' | aws s3 cp - "${BATCH_INPUT_PATH}sample.jsonl" --region "${AWS_REGION}"
|
|
1480
|
-
<% } else { %>
|
|
1481
|
-
echo '{"instances": [[1.0, 2.0, 3.0, 4.0]]}' | aws s3 cp - "${BATCH_INPUT_PATH}sample.jsonl" --region "${AWS_REGION}"
|
|
1482
|
-
<% } %>
|
|
1483
|
-
echo "✅ Sample input uploaded: ${BATCH_INPUT_PATH}sample.jsonl"
|
|
1484
|
-
echo " ⚠️ Replace this with your actual input data before running production jobs"
|
|
1485
|
-
fi
|
|
1486
|
-
<% } else { %>
|
|
1487
|
-
# Custom S3 input path provided — skip bucket creation
|
|
1488
|
-
echo "✅ Using custom S3 input path: ${BATCH_INPUT_PATH}"
|
|
1489
|
-
<% } %>
|
|
1490
|
-
|
|
1491
|
-
<% if (!batchOutputPath) { %>
|
|
1492
|
-
# Bootstrap default S3 output bucket (check-and-create, may be same as input)
|
|
1493
|
-
if [ "${BATCH_OUTPUT_BUCKET}" != "${BATCH_INPUT_BUCKET}" ]; then
|
|
1494
|
-
echo "🔍 Checking if S3 output bucket exists: ${BATCH_OUTPUT_BUCKET}"
|
|
1495
|
-
if ! aws s3api head-bucket --bucket "${BATCH_OUTPUT_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
1496
|
-
echo "📦 Creating S3 output bucket: ${BATCH_OUTPUT_BUCKET}"
|
|
1497
|
-
if [ "${AWS_REGION}" = "us-east-1" ]; then
|
|
1498
|
-
if ! aws s3api create-bucket \
|
|
1499
|
-
--bucket "${BATCH_OUTPUT_BUCKET}" \
|
|
1500
|
-
--region "${AWS_REGION}"; then
|
|
1501
|
-
echo "❌ Failed to create S3 output bucket: ${BATCH_OUTPUT_BUCKET}"
|
|
1502
|
-
exit 4
|
|
1503
|
-
fi
|
|
1504
|
-
else
|
|
1505
|
-
if ! aws s3api create-bucket \
|
|
1506
|
-
--bucket "${BATCH_OUTPUT_BUCKET}" \
|
|
1507
|
-
--region "${AWS_REGION}" \
|
|
1508
|
-
--create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
|
|
1509
|
-
echo "❌ Failed to create S3 output bucket: ${BATCH_OUTPUT_BUCKET}"
|
|
1510
|
-
exit 4
|
|
1511
|
-
fi
|
|
1512
|
-
fi
|
|
1513
|
-
echo "✅ S3 output bucket created: ${BATCH_OUTPUT_BUCKET}"
|
|
1514
|
-
else
|
|
1515
|
-
echo "✅ S3 output bucket exists: ${BATCH_OUTPUT_BUCKET}"
|
|
1516
|
-
fi
|
|
1517
|
-
else
|
|
1518
|
-
echo "✅ S3 output bucket same as input: ${BATCH_OUTPUT_BUCKET}"
|
|
1519
|
-
fi
|
|
1520
|
-
<% } else { %>
|
|
1521
|
-
# Custom S3 output path provided — skip bucket creation
|
|
1522
|
-
echo "✅ Using custom S3 output path: ${BATCH_OUTPUT_PATH}"
|
|
1523
|
-
<% } %>
|
|
1524
|
-
|
|
1525
|
-
# ============================================================
|
|
1526
|
-
# Check for previous transform job still running
|
|
1527
|
-
# ============================================================
|
|
1528
|
-
if [ "${FORCE_NEW}" != true ] && [ -n "${TRANSFORM_JOB_NAME:-}" ]; then
|
|
1529
|
-
echo "🔍 Checking previous transform job: ${TRANSFORM_JOB_NAME}"
|
|
1530
|
-
PREV_JOB_STATUS=$(aws sagemaker describe-transform-job \
|
|
1531
|
-
--transform-job-name "${TRANSFORM_JOB_NAME}" \
|
|
1532
|
-
--region "${AWS_REGION}" \
|
|
1533
|
-
--query "TransformJobStatus" \
|
|
1534
|
-
--output text 2>/dev/null || echo "")
|
|
1535
|
-
|
|
1536
|
-
case "${PREV_JOB_STATUS}" in
|
|
1537
|
-
InProgress)
|
|
1538
|
-
echo "⚠️ Previous transform job is still running: ${TRANSFORM_JOB_NAME}"
|
|
1539
|
-
echo " Wait for it to complete, or stop it with:"
|
|
1540
|
-
echo " aws sagemaker stop-transform-job --transform-job-name ${TRANSFORM_JOB_NAME} --region ${AWS_REGION}"
|
|
1541
|
-
echo ""
|
|
1542
|
-
echo " Use --force to create a new job anyway."
|
|
1543
|
-
exit 4
|
|
1544
|
-
;;
|
|
1545
|
-
Completed)
|
|
1546
|
-
echo "✅ Previous transform job completed: ${TRANSFORM_JOB_NAME}"
|
|
1547
|
-
echo " Creating a new job. Results from the previous job are in:"
|
|
1548
|
-
echo " ${BATCH_OUTPUT_PATH}"
|
|
1549
|
-
echo ""
|
|
1550
|
-
;;
|
|
1551
|
-
*)
|
|
1552
|
-
# Failed, Stopped, or not found — proceed with new job
|
|
1553
|
-
;;
|
|
1554
|
-
esac
|
|
1555
|
-
fi
|
|
1556
|
-
|
|
1557
|
-
# Generate unique names with timestamp
|
|
1558
|
-
TIMESTAMP=$(date +%s)
|
|
1559
|
-
MODEL_NAME_SM="${PROJECT_NAME}-batch-model-${TIMESTAMP}"
|
|
1560
|
-
TRANSFORM_JOB_NAME="${PROJECT_NAME}-batch-job-${TIMESTAMP}"
|
|
1561
|
-
|
|
1562
|
-
_update_config_var "TRANSFORM_JOB_NAME" "${TRANSFORM_JOB_NAME}"
|
|
1563
|
-
_update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
|
|
1564
|
-
|
|
1565
|
-
# Step 1: Create SageMaker model
|
|
1566
|
-
echo "📦 Creating SageMaker model: ${MODEL_NAME_SM}"
|
|
1567
|
-
|
|
1568
|
-
# Build primary container spec
|
|
1569
|
-
BATCH_PRIMARY_CONTAINER="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
|
|
1570
|
-
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
1571
|
-
BATCH_PRIMARY_CONTAINER="${BATCH_PRIMARY_CONTAINER},\"Environment\":{${CONTAINER_ENV_JSON}}"
|
|
1572
|
-
fi
|
|
1573
|
-
BATCH_PRIMARY_CONTAINER="${BATCH_PRIMARY_CONTAINER}}"
|
|
1574
|
-
|
|
1575
|
-
if ! aws sagemaker create-model \
|
|
1576
|
-
--model-name "${MODEL_NAME_SM}" \
|
|
1577
|
-
--primary-container "${BATCH_PRIMARY_CONTAINER}" \
|
|
1578
|
-
--execution-role-arn "${ROLE_ARN}" \
|
|
1579
|
-
--region "${AWS_REGION}"; then
|
|
1580
|
-
|
|
1581
|
-
echo "❌ Failed to create SageMaker model"
|
|
1582
|
-
echo " Check that:"
|
|
1583
|
-
echo " • The execution role ARN is valid"
|
|
1584
|
-
echo " • The ECR image exists and is accessible"
|
|
1585
|
-
echo " • The IAM role has ecr:GetDownloadUrlForLayer permission"
|
|
1586
|
-
exit 4
|
|
1587
|
-
fi
|
|
1588
|
-
|
|
1589
|
-
echo "✅ SageMaker model created: ${MODEL_NAME_SM}"
|
|
1590
|
-
|
|
1591
|
-
# Record model in manifest (non-blocking)
|
|
1592
|
-
MODEL_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${MODEL_NAME_SM}"
|
|
1593
|
-
./do/manifest add \
|
|
1594
|
-
--type sagemaker-model \
|
|
1595
|
-
--id "${MODEL_ARN}" \
|
|
1596
|
-
--project "${PROJECT_NAME}" \
|
|
1597
|
-
--meta "{\"modelName\":\"${MODEL_NAME_SM}\",\"region\":\"${AWS_REGION}\"}" \
|
|
1598
|
-
2>/dev/null || true
|
|
1599
|
-
|
|
1600
|
-
# Step 2: Build transform job JSON
|
|
1601
|
-
TRANSFORM_JOB_JSON="{
|
|
1602
|
-
\"TransformJobName\": \"${TRANSFORM_JOB_NAME}\",
|
|
1603
|
-
\"ModelName\": \"${MODEL_NAME_SM}\",
|
|
1604
|
-
\"TransformInput\": {
|
|
1605
|
-
\"DataSource\": {
|
|
1606
|
-
\"S3DataSource\": {
|
|
1607
|
-
\"S3DataType\": \"S3Prefix\",
|
|
1608
|
-
\"S3Uri\": \"${BATCH_INPUT_PATH}\"
|
|
1609
|
-
}
|
|
1610
|
-
},
|
|
1611
|
-
\"ContentType\": \"application/json\",
|
|
1612
|
-
\"SplitType\": \"${BATCH_SPLIT_TYPE}\"
|
|
1613
|
-
},
|
|
1614
|
-
\"TransformOutput\": {
|
|
1615
|
-
\"S3OutputPath\": \"${BATCH_OUTPUT_PATH}\"
|
|
1616
|
-
$([ "${BATCH_JOIN_SOURCE:-None}" = "Input" ] && echo ",\"Accept\": \"application/json\", \"AssembleWith\": \"${BATCH_SPLIT_TYPE}\"")
|
|
1617
|
-
},
|
|
1618
|
-
\"TransformResources\": {
|
|
1619
|
-
\"InstanceType\": \"${INSTANCE_TYPE}\",
|
|
1620
|
-
\"InstanceCount\": ${BATCH_INSTANCE_COUNT}
|
|
1621
|
-
},
|
|
1622
|
-
\"MaxConcurrentTransforms\": ${BATCH_MAX_CONCURRENT_TRANSFORMS:-1},
|
|
1623
|
-
\"MaxPayloadInMB\": ${BATCH_MAX_PAYLOAD_IN_MB:-6},
|
|
1624
|
-
\"BatchStrategy\": \"${BATCH_STRATEGY}\"
|
|
1625
|
-
$([ "${BATCH_JOIN_SOURCE:-None}" = "Input" ] && echo ",\"DataProcessing\": { \"JoinSource\": \"Input\" }")
|
|
1626
|
-
}"
|
|
1627
|
-
|
|
1628
|
-
# Step 3: Create transform job
|
|
1629
|
-
echo "🚀 Creating transform job: ${TRANSFORM_JOB_NAME}"
|
|
1630
|
-
if ! aws sagemaker create-transform-job \
|
|
1631
|
-
--cli-input-json "${TRANSFORM_JOB_JSON}" \
|
|
1632
|
-
--region "${AWS_REGION}"; then
|
|
1633
|
-
|
|
1634
|
-
echo "❌ Failed to create transform job"
|
|
1635
|
-
echo " Check that:"
|
|
1636
|
-
echo " • The S3 input path exists and is accessible: ${BATCH_INPUT_PATH}"
|
|
1637
|
-
echo " • The S3 output path is writable: ${BATCH_OUTPUT_PATH}"
|
|
1638
|
-
echo " • The IAM role has s3:GetObject permission on the input path"
|
|
1639
|
-
echo " • The IAM role has s3:PutObject permission on the output path"
|
|
1640
|
-
echo " • The instance type is valid: ${INSTANCE_TYPE}"
|
|
1641
|
-
echo " • The instance type is available in region: ${AWS_REGION}"
|
|
1642
|
-
echo " • You have sufficient service quota for the instance type"
|
|
1643
|
-
exit 4
|
|
1644
|
-
fi
|
|
1645
|
-
|
|
1646
|
-
echo "✅ Transform job created: ${TRANSFORM_JOB_NAME}"
|
|
1647
|
-
|
|
1648
|
-
# Record transform job in manifest (non-blocking)
|
|
1649
|
-
TRANSFORM_JOB_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:transform-job/${TRANSFORM_JOB_NAME}"
|
|
1650
|
-
./do/manifest add \
|
|
1651
|
-
--type sagemaker-transform-job \
|
|
1652
|
-
--id "${TRANSFORM_JOB_ARN}" \
|
|
1653
|
-
--project "${PROJECT_NAME}" \
|
|
1654
|
-
--meta "{\"transformJobName\":\"${TRANSFORM_JOB_NAME}\",\"modelName\":\"${MODEL_NAME_SM}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
1655
|
-
2>/dev/null || true
|
|
1656
|
-
|
|
1657
|
-
# Step 4: Poll transform job status until completion or failure
|
|
1658
|
-
echo "⏳ Waiting for transform job to complete..."
|
|
1659
|
-
echo " This may take several minutes depending on dataset size..."
|
|
1660
|
-
echo " If this times out, check status with:"
|
|
1661
|
-
echo " aws sagemaker describe-transform-job --transform-job-name ${TRANSFORM_JOB_NAME} --region ${AWS_REGION}"
|
|
1662
|
-
echo ""
|
|
1663
|
-
|
|
1664
|
-
while true; do
|
|
1665
|
-
JOB_STATUS=$(aws sagemaker describe-transform-job \
|
|
1666
|
-
--transform-job-name "${TRANSFORM_JOB_NAME}" \
|
|
1667
|
-
--region "${AWS_REGION}" \
|
|
1668
|
-
--query "TransformJobStatus" \
|
|
1669
|
-
--output text 2>&1) || {
|
|
1670
|
-
# Check if it was a credential expiration
|
|
1671
|
-
if echo "${JOB_STATUS}" | grep -qi "expired\|token"; then
|
|
1672
|
-
echo ""
|
|
1673
|
-
echo "⚠️ Credentials expired, but the transform job is still running."
|
|
1674
|
-
echo " Refresh your credentials and check status with:"
|
|
1675
|
-
echo " aws sagemaker describe-transform-job --transform-job-name ${TRANSFORM_JOB_NAME} --region ${AWS_REGION} --query TransformJobStatus"
|
|
1676
|
-
exit 4
|
|
1677
|
-
fi
|
|
1678
|
-
echo "❌ Failed to describe transform job: ${TRANSFORM_JOB_NAME}"
|
|
1679
|
-
echo " Error: ${JOB_STATUS}"
|
|
1680
|
-
exit 4
|
|
1681
|
-
}
|
|
1682
|
-
|
|
1683
|
-
case "${JOB_STATUS}" in
|
|
1684
|
-
Completed)
|
|
1685
|
-
echo "✅ Transform job completed successfully!"
|
|
1686
|
-
break
|
|
1687
|
-
;;
|
|
1688
|
-
Failed)
|
|
1689
|
-
FAILURE_REASON=$(aws sagemaker describe-transform-job \
|
|
1690
|
-
--transform-job-name "${TRANSFORM_JOB_NAME}" \
|
|
1691
|
-
--region "${AWS_REGION}" \
|
|
1692
|
-
--query "FailureReason" \
|
|
1693
|
-
--output text 2>/dev/null || echo "Unknown")
|
|
1694
|
-
echo "❌ Transform job failed"
|
|
1695
|
-
echo " Reason: ${FAILURE_REASON}"
|
|
1696
|
-
echo ""
|
|
1697
|
-
echo " Check CloudWatch Logs for details:"
|
|
1698
|
-
echo " https://console.aws.amazon.com/cloudwatch/home?region=${AWS_REGION}#logsV2:log-groups/log-group//aws/sagemaker/TransformJobs"
|
|
1699
|
-
echo ""
|
|
1700
|
-
echo " Verify that:"
|
|
1701
|
-
echo " • The S3 input path exists and contains data: ${BATCH_INPUT_PATH}"
|
|
1702
|
-
echo " • The input data format matches the container's expected format"
|
|
1703
|
-
echo " • The container's /ping and /invocations endpoints work correctly"
|
|
1704
|
-
exit 4
|
|
1705
|
-
;;
|
|
1706
|
-
Stopped)
|
|
1707
|
-
echo "⚠️ Transform job was stopped"
|
|
1708
|
-
exit 4
|
|
1709
|
-
;;
|
|
1710
|
-
InProgress)
|
|
1711
|
-
echo " $(date +%H:%M:%S) Job status: InProgress..."
|
|
1712
|
-
sleep 30
|
|
1713
|
-
;;
|
|
1714
|
-
*)
|
|
1715
|
-
echo " $(date +%H:%M:%S) Job status: ${JOB_STATUS}..."
|
|
1716
|
-
sleep 30
|
|
1717
|
-
;;
|
|
1718
|
-
esac
|
|
1719
|
-
done
|
|
1720
|
-
|
|
1721
|
-
echo ""
|
|
1722
|
-
echo "📋 Deployment Details:"
|
|
1723
|
-
echo " Transform Job: ${TRANSFORM_JOB_NAME}"
|
|
1724
|
-
echo " Model: ${MODEL_NAME_SM}"
|
|
1725
|
-
echo " Region: ${AWS_REGION}"
|
|
1726
|
-
echo " Instance Type: ${INSTANCE_TYPE}"
|
|
1727
|
-
echo " Instance Count: ${BATCH_INSTANCE_COUNT}"
|
|
1728
|
-
echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
1729
|
-
echo " S3 Input: ${BATCH_INPUT_PATH}"
|
|
1730
|
-
echo " S3 Output: ${BATCH_OUTPUT_PATH}"
|
|
1731
|
-
echo " Split Type: ${BATCH_SPLIT_TYPE}"
|
|
1732
|
-
echo " Strategy: ${BATCH_STRATEGY}"
|
|
1733
|
-
echo ""
|
|
1734
|
-
|
|
1735
|
-
# Download results locally
|
|
1736
|
-
LOCAL_OUTPUT_DIR="${SCRIPT_DIR}/../batch-output"
|
|
1737
|
-
mkdir -p "${LOCAL_OUTPUT_DIR}"
|
|
1738
|
-
echo "📥 Downloading results to ${LOCAL_OUTPUT_DIR}/"
|
|
1739
|
-
if aws s3 sync "${BATCH_OUTPUT_PATH}" "${LOCAL_OUTPUT_DIR}/" --region "${AWS_REGION}"; then
|
|
1740
|
-
DOWNLOADED=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | wc -l | tr -d ' ')
|
|
1741
|
-
echo "✅ Downloaded ${DOWNLOADED} file(s) to ${LOCAL_OUTPUT_DIR}/"
|
|
1742
|
-
echo ""
|
|
1743
|
-
|
|
1744
|
-
# Display first output file preview
|
|
1745
|
-
FIRST_FILE=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | head -1)
|
|
1746
|
-
if [ -n "${FIRST_FILE}" ]; then
|
|
1747
|
-
echo "📄 Sample output (${FIRST_FILE}):"
|
|
1748
|
-
head -5 "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}"
|
|
1749
|
-
LINES=$(wc -l < "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}" | tr -d ' ')
|
|
1750
|
-
if [ "${LINES}" -gt 5 ]; then
|
|
1751
|
-
echo " ... (${LINES} total lines)"
|
|
1752
|
-
fi
|
|
1753
|
-
fi
|
|
1754
|
-
else
|
|
1755
|
-
echo "⚠️ Could not download output files"
|
|
1756
|
-
fi
|
|
1757
|
-
|
|
1758
|
-
echo ""
|
|
1759
|
-
echo "📋 What's next?"
|
|
1760
|
-
echo " • View results: cat batch-output/"
|
|
1761
|
-
echo " • Review results: ./do/test"
|
|
1762
|
-
echo " • Register this deployment: ./do/register"
|
|
1763
|
-
echo " • View logs: ./do/logs"
|
|
1764
|
-
echo " • Clean up when done: ./do/clean"
|
|
1765
|
-
|
|
1766
|
-
<% } %>
|
|
1
|
+
<%- include('deploy.d/' + (deploymentTarget === 'realtime-inference' ? 'managed-inference' : deploymentTarget)) %>
|