@aws/ml-container-creator 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +5 -2
- package/infra/ci-harness/buildspec.yml +60 -0
- package/package.json +1 -1
- package/servers/README.md +41 -1
- package/servers/instance-sizer/index.js +6 -0
- package/src/app.js +33 -2
- package/src/lib/config-manager.js +40 -1
- package/src/lib/deployment-entry-schema.js +16 -0
- package/src/lib/prompt-runner.js +174 -3
- package/src/lib/prompts.js +222 -2
- package/src/lib/registry-command-handler.js +12 -0
- package/templates/Dockerfile +12 -0
- package/templates/code/serving.properties +14 -0
- package/templates/do/adapter +1214 -0
- package/templates/do/adapters/.gitkeep +2 -0
- package/templates/do/add-ic +130 -0
- package/templates/do/benchmark +81 -9
- package/templates/do/clean +507 -17
- package/templates/do/config +23 -1
- package/templates/do/deploy +513 -367
- package/templates/do/ic/default.conf +32 -0
- package/templates/do/lib/endpoint-config.sh +216 -0
- package/templates/do/lib/inference-component.sh +167 -0
- package/templates/do/lib/secrets.sh +44 -0
- package/templates/do/lib/wait.sh +131 -0
- package/templates/do/logs +107 -27
- package/templates/do/optimize +528 -0
- package/templates/do/register +111 -1
- package/templates/do/status +337 -0
- package/templates/do/test +80 -28
package/templates/do/deploy
CHANGED
|
@@ -9,20 +9,59 @@ set -o pipefail
|
|
|
9
9
|
# Parse flags
|
|
10
10
|
FORCE_NEW=false
|
|
11
11
|
FORCE_IC=false
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
--force
|
|
12
|
+
IC_TARGET=""
|
|
13
|
+
while [ $# -gt 0 ]; do
|
|
14
|
+
case "$1" in
|
|
15
|
+
--force) FORCE_NEW=true; shift ;;
|
|
16
|
+
--force-ic)
|
|
17
|
+
FORCE_IC=true
|
|
18
|
+
shift
|
|
19
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
20
|
+
# Optional name argument: --force-ic <name>
|
|
21
|
+
if [ $# -gt 0 ] && [[ ! "$1" == --* ]]; then
|
|
22
|
+
IC_TARGET="$1"
|
|
23
|
+
shift
|
|
24
|
+
fi
|
|
25
|
+
<% } %>
|
|
26
|
+
;;
|
|
27
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
28
|
+
--ic)
|
|
29
|
+
if [ -z "${2:-}" ]; then
|
|
30
|
+
echo "❌ --ic requires a name argument"
|
|
31
|
+
echo " Usage: ./do/deploy --ic <name>"
|
|
32
|
+
exit 1
|
|
33
|
+
fi
|
|
34
|
+
IC_TARGET="$2"
|
|
35
|
+
shift 2
|
|
36
|
+
;;
|
|
37
|
+
<% } %>
|
|
16
38
|
--help|-h)
|
|
39
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
40
|
+
echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>]"
|
|
41
|
+
echo ""
|
|
42
|
+
echo "Options:"
|
|
43
|
+
echo " --force Create a new endpoint and IC, even if one already exists."
|
|
44
|
+
echo " --force-ic Recreate ALL inference components on the existing endpoint."
|
|
45
|
+
echo " --force-ic <name> Recreate only the named IC on the existing endpoint."
|
|
46
|
+
echo " --ic <name> Deploy only the named IC (from do/ic/<name>.conf)."
|
|
47
|
+
echo ""
|
|
48
|
+
echo "Without flags, deploy resumes from the last run."
|
|
49
|
+
<% } else { %>
|
|
17
50
|
echo "Usage: ./do/deploy [--force] [--force-ic]"
|
|
18
51
|
echo ""
|
|
19
52
|
echo "Options:"
|
|
20
|
-
echo " --force Create a new endpoint
|
|
21
|
-
echo " --force-ic Recreate
|
|
53
|
+
echo " --force Create a new endpoint, even if one already exists."
|
|
54
|
+
echo " --force-ic Recreate the inference component on the existing endpoint."
|
|
22
55
|
echo ""
|
|
23
56
|
echo "Without flags, deploy resumes from the last run."
|
|
57
|
+
<% } %>
|
|
24
58
|
exit 0
|
|
25
59
|
;;
|
|
60
|
+
*)
|
|
61
|
+
echo "❌ Unknown option: $1"
|
|
62
|
+
echo " Run ./do/deploy --help for usage."
|
|
63
|
+
exit 1
|
|
64
|
+
;;
|
|
26
65
|
esac
|
|
27
66
|
done
|
|
28
67
|
|
|
@@ -37,7 +76,11 @@ echo " Region: ${AWS_REGION}"
|
|
|
37
76
|
echo " Build target: ${BUILD_TARGET}"
|
|
38
77
|
echo " Deployment target: ${DEPLOYMENT_TARGET}"
|
|
39
78
|
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
40
|
-
|
|
79
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
80
|
+
echo " Endpoint: ${ENDPOINT_NAME} (external)"
|
|
81
|
+
else
|
|
82
|
+
echo " Instance type: ${INSTANCE_TYPE}"
|
|
83
|
+
fi
|
|
41
84
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
42
85
|
echo " Instance type: ${INSTANCE_TYPE}"
|
|
43
86
|
echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
@@ -135,6 +178,12 @@ fi
|
|
|
135
178
|
# SageMaker Real-Time Inference Deployment (Inference Components)
|
|
136
179
|
# ============================================================
|
|
137
180
|
|
|
181
|
+
# Source shared helpers
|
|
182
|
+
source "${SCRIPT_DIR}/lib/secrets.sh"
|
|
183
|
+
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
184
|
+
source "${SCRIPT_DIR}/lib/endpoint-config.sh"
|
|
185
|
+
source "${SCRIPT_DIR}/lib/inference-component.sh"
|
|
186
|
+
|
|
138
187
|
# Validate execution role ARN
|
|
139
188
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
140
189
|
echo "❌ Execution role ARN not provided"
|
|
@@ -155,44 +204,30 @@ fi
|
|
|
155
204
|
|
|
156
205
|
echo " Using execution role: ${ROLE_ARN}"
|
|
157
206
|
|
|
158
|
-
#
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
echo "" >> "${config_file}"
|
|
166
|
-
echo "export ${var_name}=\"${var_value}\"" >> "${config_file}"
|
|
207
|
+
# Validate --ic argument if specified (set by --ic <name> or --force-ic <name>)
|
|
208
|
+
if [ -n "${IC_TARGET}" ]; then
|
|
209
|
+
if [ ! -d "${SCRIPT_DIR}/ic" ]; then
|
|
210
|
+
echo "❌ IC name specified but no do/ic/ directory found"
|
|
211
|
+
echo " This project does not use multi-IC configuration."
|
|
212
|
+
echo " Remove --ic/--force-ic <name> to deploy using the legacy single-IC path."
|
|
213
|
+
exit 1
|
|
167
214
|
fi
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
--inference-component-name "$1" \
|
|
182
|
-
--region "${AWS_REGION}" \
|
|
183
|
-
--query InferenceComponentStatus \
|
|
184
|
-
--output text 2>/dev/null || echo ""
|
|
185
|
-
}
|
|
215
|
+
if [ ! -f "${SCRIPT_DIR}/ic/${IC_TARGET}.conf" ]; then
|
|
216
|
+
echo "❌ IC config not found: do/ic/${IC_TARGET}.conf"
|
|
217
|
+
echo ""
|
|
218
|
+
echo " Available ICs:"
|
|
219
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
220
|
+
[ -f "${conf}" ] || continue
|
|
221
|
+
echo " • $(basename "${conf}" .conf)"
|
|
222
|
+
done
|
|
223
|
+
echo ""
|
|
224
|
+
echo " Usage: ./do/deploy --ic <name>"
|
|
225
|
+
exit 1
|
|
226
|
+
fi
|
|
227
|
+
fi
|
|
186
228
|
|
|
187
|
-
#
|
|
188
|
-
|
|
189
|
-
aws sagemaker list-inference-components \
|
|
190
|
-
--endpoint-name "$1" \
|
|
191
|
-
--status-equals InService \
|
|
192
|
-
--region "${AWS_REGION}" \
|
|
193
|
-
--query 'InferenceComponents[0].InferenceComponentName' \
|
|
194
|
-
--output text 2>/dev/null || echo ""
|
|
195
|
-
}
|
|
229
|
+
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
230
|
+
resolve_secrets
|
|
196
231
|
|
|
197
232
|
# ============================================================
|
|
198
233
|
# Idempotency: check for existing deployment from a previous run
|
|
@@ -204,7 +239,11 @@ if [ "${FORCE_NEW}" = true ]; then
|
|
|
204
239
|
elif [ "${FORCE_IC}" = true ] && [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
205
240
|
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
206
241
|
if [ "${EP_STATUS}" = "InService" ]; then
|
|
207
|
-
|
|
242
|
+
if [ -n "${IC_TARGET}" ]; then
|
|
243
|
+
echo "🔄 --force-ic: recreating IC '${IC_TARGET}' on existing endpoint: ${ENDPOINT_NAME}"
|
|
244
|
+
else
|
|
245
|
+
echo "🔄 --force-ic: recreating ALL inference components on existing endpoint: ${ENDPOINT_NAME}"
|
|
246
|
+
fi
|
|
208
247
|
SKIP_TO="create_ic"
|
|
209
248
|
else
|
|
210
249
|
echo "⚠️ --force-ic requires an InService endpoint, but ${ENDPOINT_NAME} is: ${EP_STATUS:-not found}"
|
|
@@ -242,7 +281,7 @@ elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
|
242
281
|
Creating)
|
|
243
282
|
echo "⏳ Inference component still creating: ${INFERENCE_COMPONENT_NAME}"
|
|
244
283
|
SKIP_TO="wait_ic"
|
|
245
|
-
|
|
284
|
+
IC_DEPLOYED_NAME="${INFERENCE_COMPONENT_NAME}"
|
|
246
285
|
;;
|
|
247
286
|
Failed)
|
|
248
287
|
echo "⚠️ Inference component failed: ${INFERENCE_COMPONENT_NAME}"
|
|
@@ -251,47 +290,59 @@ elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
|
251
290
|
;;
|
|
252
291
|
*)
|
|
253
292
|
# Stored IC not found — check if a different IC is running on this endpoint
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
echo "
|
|
257
|
-
echo " (config had stale reference: ${INFERENCE_COMPONENT_NAME})"
|
|
258
|
-
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
259
|
-
echo ""
|
|
260
|
-
echo "📋 Deployment is already live. Nothing to do."
|
|
261
|
-
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
262
|
-
echo " Inference Component: ${LIVE_IC}"
|
|
263
|
-
echo ""
|
|
264
|
-
echo "🧪 Test your endpoint:"
|
|
265
|
-
echo " ./do/test"
|
|
266
|
-
echo ""
|
|
267
|
-
echo "🧹 Clean up when done:"
|
|
268
|
-
echo " ./do/clean endpoint"
|
|
269
|
-
exit 0
|
|
270
|
-
else
|
|
271
|
-
echo " No existing inference component found on endpoint. Will create one."
|
|
293
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
294
|
+
# External endpoint: never adopt ICs we didn't create
|
|
295
|
+
echo " Stored IC not found on external endpoint. Will create a new one."
|
|
272
296
|
SKIP_TO="create_ic"
|
|
297
|
+
else
|
|
298
|
+
LIVE_IC=$(_find_active_ic_on_endpoint "${ENDPOINT_NAME}")
|
|
299
|
+
if [ -n "${LIVE_IC}" ] && [ "${LIVE_IC}" != "None" ]; then
|
|
300
|
+
echo "✅ Found running inference component on endpoint: ${LIVE_IC}"
|
|
301
|
+
echo " (config had stale reference: ${INFERENCE_COMPONENT_NAME})"
|
|
302
|
+
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
303
|
+
echo ""
|
|
304
|
+
echo "📋 Deployment is already live. Nothing to do."
|
|
305
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
306
|
+
echo " Inference Component: ${LIVE_IC}"
|
|
307
|
+
echo ""
|
|
308
|
+
echo "🧪 Test your endpoint:"
|
|
309
|
+
echo " ./do/test"
|
|
310
|
+
echo ""
|
|
311
|
+
echo "🧹 Clean up when done:"
|
|
312
|
+
echo " ./do/clean endpoint"
|
|
313
|
+
exit 0
|
|
314
|
+
else
|
|
315
|
+
echo " No existing inference component found on endpoint. Will create one."
|
|
316
|
+
SKIP_TO="create_ic"
|
|
317
|
+
fi
|
|
273
318
|
fi
|
|
274
319
|
;;
|
|
275
320
|
esac
|
|
276
321
|
else
|
|
277
322
|
# No IC name in config — check if one is already running on the endpoint
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
echo "
|
|
281
|
-
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
282
|
-
echo ""
|
|
283
|
-
echo "📋 Deployment is already live. Nothing to do."
|
|
284
|
-
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
285
|
-
echo " Inference Component: ${LIVE_IC}"
|
|
286
|
-
echo ""
|
|
287
|
-
echo "🧪 Test your endpoint:"
|
|
288
|
-
echo " ./do/test"
|
|
289
|
-
echo ""
|
|
290
|
-
echo "🧹 Clean up when done:"
|
|
291
|
-
echo " ./do/clean endpoint"
|
|
292
|
-
exit 0
|
|
293
|
-
else
|
|
323
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
324
|
+
# External endpoint: never adopt ICs we didn't create
|
|
325
|
+
echo " No previous IC deployed by this project. Will create a new one."
|
|
294
326
|
SKIP_TO="create_ic"
|
|
327
|
+
else
|
|
328
|
+
LIVE_IC=$(_find_active_ic_on_endpoint "${ENDPOINT_NAME}")
|
|
329
|
+
if [ -n "${LIVE_IC}" ] && [ "${LIVE_IC}" != "None" ]; then
|
|
330
|
+
echo "✅ Found running inference component on endpoint: ${LIVE_IC}"
|
|
331
|
+
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
332
|
+
echo ""
|
|
333
|
+
echo "📋 Deployment is already live. Nothing to do."
|
|
334
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
335
|
+
echo " Inference Component: ${LIVE_IC}"
|
|
336
|
+
echo ""
|
|
337
|
+
echo "🧪 Test your endpoint:"
|
|
338
|
+
echo " ./do/test"
|
|
339
|
+
echo ""
|
|
340
|
+
echo "🧹 Clean up when done:"
|
|
341
|
+
echo " ./do/clean endpoint"
|
|
342
|
+
exit 0
|
|
343
|
+
else
|
|
344
|
+
SKIP_TO="create_ic"
|
|
345
|
+
fi
|
|
295
346
|
fi
|
|
296
347
|
fi
|
|
297
348
|
;;
|
|
@@ -316,252 +367,399 @@ elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
|
316
367
|
fi
|
|
317
368
|
|
|
318
369
|
# ============================================================
|
|
319
|
-
# Step 1: Create endpoint configuration (skip if resuming)
|
|
370
|
+
# Step 1: Create endpoint configuration and endpoint (skip if resuming)
|
|
320
371
|
# ============================================================
|
|
321
372
|
if [ -z "${SKIP_TO}" ]; then
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
_update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
|
|
328
|
-
_update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
|
|
329
|
-
_update_config_var "INFERENCE_COMPONENT_NAME" "${IC_NAME}"
|
|
373
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
374
|
+
# External endpoint: validate it still exists and is InService
|
|
375
|
+
echo "🔗 Using external endpoint: ${ENDPOINT_NAME}"
|
|
376
|
+
echo " Validating endpoint status..."
|
|
330
377
|
|
|
331
|
-
|
|
332
|
-
VARIANT_JSON="[{\"VariantName\":\"AllTraffic\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1"
|
|
378
|
+
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
333
379
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
380
|
+
if [ -z "${EP_STATUS}" ]; then
|
|
381
|
+
echo "❌ External endpoint not found: ${ENDPOINT_NAME}"
|
|
382
|
+
echo " The endpoint may have been deleted. Update ENDPOINT_NAME in do/config"
|
|
383
|
+
echo " or remove ENDPOINT_EXTERNAL=true to create a new endpoint."
|
|
384
|
+
exit 4
|
|
385
|
+
fi
|
|
338
386
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
387
|
+
if [ "${EP_STATUS}" != "InService" ]; then
|
|
388
|
+
echo "❌ External endpoint not InService: ${ENDPOINT_NAME} (status: ${EP_STATUS})"
|
|
389
|
+
echo " The endpoint must be InService before attaching inference components."
|
|
390
|
+
echo " Wait for the endpoint to become InService, or update do/config."
|
|
391
|
+
exit 4
|
|
392
|
+
fi
|
|
343
393
|
|
|
344
|
-
|
|
394
|
+
echo "✅ External endpoint is InService: ${ENDPOINT_NAME}"
|
|
395
|
+
# Skip directly to IC creation — no endpoint config, no endpoint creation, no wait
|
|
396
|
+
SKIP_TO="create_ic"
|
|
397
|
+
else
|
|
398
|
+
TIMESTAMP=$(date +%s)
|
|
399
|
+
ENDPOINT_NAME="${PROJECT_NAME}-endpoint-${TIMESTAMP}"
|
|
345
400
|
|
|
346
|
-
|
|
347
|
-
if ! aws sagemaker create-endpoint-config \
|
|
348
|
-
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
349
|
-
--execution-role-arn "${ROLE_ARN}" \
|
|
350
|
-
--production-variants "${VARIANT_JSON}" \
|
|
351
|
-
--region "${AWS_REGION}"; then
|
|
401
|
+
_update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
|
|
352
402
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
echo " • The execution role ARN is valid"
|
|
356
|
-
echo " • The instance type is valid: ${INSTANCE_TYPE}"
|
|
357
|
-
echo " • The instance type is available in region: ${AWS_REGION}"
|
|
358
|
-
echo " • You have sufficient service quota for the instance type"
|
|
359
|
-
exit 4
|
|
360
|
-
fi
|
|
403
|
+
# Create endpoint configuration via shared helper
|
|
404
|
+
create_endpoint_config
|
|
361
405
|
|
|
362
|
-
|
|
406
|
+
_update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
|
|
363
407
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
408
|
+
# Record endpoint config in manifest (non-blocking)
|
|
409
|
+
ENDPOINT_CONFIG_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${ENDPOINT_CONFIG_NAME}"
|
|
410
|
+
./do/manifest add \
|
|
411
|
+
--type sagemaker-endpoint-config \
|
|
412
|
+
--id "${ENDPOINT_CONFIG_ARN}" \
|
|
413
|
+
--project "${PROJECT_NAME}" \
|
|
414
|
+
--meta "{\"endpointConfigName\":\"${ENDPOINT_CONFIG_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
415
|
+
2>/dev/null || true
|
|
372
416
|
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
417
|
+
# Step 2: Create endpoint
|
|
418
|
+
echo "🚀 Creating endpoint: ${ENDPOINT_NAME}"
|
|
419
|
+
if ! aws sagemaker create-endpoint \
|
|
420
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
421
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
422
|
+
--region "${AWS_REGION}"; then
|
|
379
423
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
424
|
+
echo "❌ Failed to create endpoint"
|
|
425
|
+
echo " Check that:"
|
|
426
|
+
echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
|
|
427
|
+
echo " • You have sufficient service quota in region: ${AWS_REGION}"
|
|
428
|
+
exit 4
|
|
429
|
+
fi
|
|
386
430
|
|
|
387
|
-
|
|
431
|
+
echo "✅ Endpoint creation initiated: ${ENDPOINT_NAME}"
|
|
388
432
|
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
433
|
+
# Record endpoint in manifest (non-blocking)
|
|
434
|
+
ENDPOINT_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${ENDPOINT_NAME}"
|
|
435
|
+
./do/manifest add \
|
|
436
|
+
--type sagemaker-endpoint \
|
|
437
|
+
--id "${ENDPOINT_ARN}" \
|
|
438
|
+
--project "${PROJECT_NAME}" \
|
|
439
|
+
--meta "{\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
440
|
+
2>/dev/null || true
|
|
441
|
+
fi
|
|
397
442
|
fi
|
|
398
443
|
|
|
399
444
|
# ============================================================
|
|
400
|
-
# Wait for endpoint (skip if already InService)
|
|
445
|
+
# Wait for endpoint (skip if already InService or external)
|
|
401
446
|
# ============================================================
|
|
402
447
|
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
403
448
|
echo "⏳ Waiting for endpoint to reach InService status..."
|
|
404
449
|
echo " This may take a few minutes..."
|
|
405
450
|
echo " If this times out, re-run ./do/deploy to resume."
|
|
406
451
|
|
|
407
|
-
|
|
408
|
-
--endpoint-name "${ENDPOINT_NAME}" \
|
|
409
|
-
--region "${AWS_REGION}"; then
|
|
410
|
-
|
|
411
|
-
# Check if it was a credential expiration vs actual failure
|
|
412
|
-
EP_CHECK=$(_get_endpoint_status "${ENDPOINT_NAME}" 2>/dev/null)
|
|
413
|
-
if [ "${EP_CHECK}" = "Creating" ]; then
|
|
414
|
-
echo ""
|
|
415
|
-
echo "⚠️ Wait interrupted (credentials may have expired), but endpoint is still creating."
|
|
416
|
-
echo " Refresh your credentials and re-run ./do/deploy to resume."
|
|
417
|
-
echo ""
|
|
418
|
-
echo " Or check status manually:"
|
|
419
|
-
echo " aws sagemaker describe-endpoint --endpoint-name ${ENDPOINT_NAME} --region ${AWS_REGION} --query EndpointStatus"
|
|
420
|
-
exit 4
|
|
421
|
-
fi
|
|
422
|
-
|
|
423
|
-
echo "❌ Endpoint failed to reach InService status"
|
|
424
|
-
echo " Check CloudWatch Logs for details:"
|
|
425
|
-
echo " https://console.aws.amazon.com/cloudwatch/home?region=${AWS_REGION}#logsV2:log-groups/log-group//aws/sagemaker/Endpoints/${ENDPOINT_NAME}"
|
|
426
|
-
exit 4
|
|
427
|
-
fi
|
|
452
|
+
wait_endpoint "${ENDPOINT_NAME}"
|
|
428
453
|
|
|
429
454
|
echo "✅ Endpoint is InService: ${ENDPOINT_NAME}"
|
|
430
455
|
fi
|
|
431
456
|
|
|
432
457
|
# ============================================================
|
|
433
|
-
# Step 3:
|
|
458
|
+
# Step 3: Deploy inference components (skip if resuming from wait_ic)
|
|
434
459
|
# ============================================================
|
|
435
460
|
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
436
|
-
# Generate new IC name if resuming after endpoint wait or failed IC
|
|
437
|
-
if [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
438
|
-
TIMESTAMP=$(date +%s)
|
|
439
|
-
IC_NAME="${PROJECT_NAME}-ic-${TIMESTAMP}"
|
|
440
|
-
_update_config_var "INFERENCE_COMPONENT_NAME" "${IC_NAME}"
|
|
441
|
-
fi
|
|
442
461
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
462
|
+
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
463
|
+
# _check_gpu_capacity
|
|
464
|
+
# Best-effort capacity guardrail: sums IC_GPU_COUNT across all do/ic/*.conf
|
|
465
|
+
# and compares against known GPU count for the instance type.
|
|
466
|
+
# Warns (does not error) if total exceeds instance capacity.
|
|
467
|
+
# Skips check if instance type is not in the known map.
|
|
468
|
+
_check_gpu_capacity() {
|
|
469
|
+
# Skip check if no INSTANCE_TYPE (external endpoints)
|
|
470
|
+
if [ -z "${INSTANCE_TYPE:-}" ]; then
|
|
471
|
+
return 0
|
|
472
|
+
fi
|
|
449
473
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
474
|
+
# Best-effort capacity guardrail: sums GPU requirements from base ICs only.
|
|
475
|
+
# NOTE: Only do/ic/*.conf files are counted. Adapter ICs (do/adapters/*.conf)
|
|
476
|
+
# share the base IC's GPU resources and have no ComputeResourceRequirements,
|
|
477
|
+
# so they are intentionally excluded from this capacity check.
|
|
478
|
+
#
|
|
479
|
+
# Hardcoded GPU counts for common SageMaker GPU instance types
|
|
480
|
+
local instance_gpus=""
|
|
481
|
+
case "${INSTANCE_TYPE}" in
|
|
482
|
+
ml.g4dn.xlarge) instance_gpus=1 ;;
|
|
483
|
+
ml.g4dn.12xlarge) instance_gpus=4 ;;
|
|
484
|
+
ml.g5.xlarge) instance_gpus=1 ;;
|
|
485
|
+
ml.g5.2xlarge) instance_gpus=1 ;;
|
|
486
|
+
ml.g5.4xlarge) instance_gpus=1 ;;
|
|
487
|
+
ml.g5.8xlarge) instance_gpus=1 ;;
|
|
488
|
+
ml.g5.12xlarge) instance_gpus=4 ;;
|
|
489
|
+
ml.g5.48xlarge) instance_gpus=8 ;;
|
|
490
|
+
ml.g6.xlarge) instance_gpus=1 ;;
|
|
491
|
+
ml.g6.12xlarge) instance_gpus=4 ;;
|
|
492
|
+
ml.g6.48xlarge) instance_gpus=8 ;;
|
|
493
|
+
ml.g6e.xlarge) instance_gpus=1 ;;
|
|
494
|
+
ml.g6e.2xlarge) instance_gpus=1 ;;
|
|
495
|
+
ml.g6e.4xlarge) instance_gpus=1 ;;
|
|
496
|
+
ml.g6e.8xlarge) instance_gpus=1 ;;
|
|
497
|
+
ml.g6e.12xlarge) instance_gpus=4 ;;
|
|
498
|
+
ml.g6e.48xlarge) instance_gpus=8 ;;
|
|
499
|
+
ml.g7e.xlarge) instance_gpus=1 ;;
|
|
500
|
+
ml.g7e.2xlarge) instance_gpus=1 ;;
|
|
501
|
+
ml.g7e.4xlarge) instance_gpus=1 ;;
|
|
502
|
+
ml.g7e.8xlarge) instance_gpus=1 ;;
|
|
503
|
+
ml.g7e.12xlarge) instance_gpus=4 ;;
|
|
504
|
+
ml.g7e.48xlarge) instance_gpus=8 ;;
|
|
505
|
+
ml.p3.2xlarge) instance_gpus=1 ;;
|
|
506
|
+
ml.p3.8xlarge) instance_gpus=4 ;;
|
|
507
|
+
ml.p3.16xlarge) instance_gpus=8 ;;
|
|
508
|
+
ml.p4d.24xlarge) instance_gpus=8 ;;
|
|
509
|
+
ml.p4de.24xlarge) instance_gpus=8 ;;
|
|
510
|
+
ml.p5.48xlarge) instance_gpus=8 ;;
|
|
511
|
+
*) instance_gpus="" ;;
|
|
512
|
+
esac
|
|
513
|
+
|
|
514
|
+
# Skip check if instance type not in map
|
|
515
|
+
if [ -z "${instance_gpus}" ]; then
|
|
516
|
+
return 0
|
|
517
|
+
fi
|
|
467
518
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
519
|
+
# Sum IC_GPU_COUNT across all IC config files
|
|
520
|
+
local total_gpu_requested=0
|
|
521
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
522
|
+
[ -f "${conf}" ] || continue
|
|
523
|
+
local ic_gpus
|
|
524
|
+
ic_gpus=$(grep "^export IC_GPU_COUNT=" "${conf}" 2>/dev/null | sed 's/^export IC_GPU_COUNT=//' | tr -d '"' || echo "1")
|
|
525
|
+
if [ -z "${ic_gpus}" ]; then
|
|
526
|
+
ic_gpus=1
|
|
527
|
+
fi
|
|
528
|
+
total_gpu_requested=$(( total_gpu_requested + ic_gpus ))
|
|
529
|
+
done
|
|
475
530
|
|
|
476
|
-
|
|
531
|
+
if [ "${total_gpu_requested}" -gt "${instance_gpus}" ]; then
|
|
532
|
+
echo ""
|
|
533
|
+
echo "⚠️ GPU capacity warning: ICs request ${total_gpu_requested} GPUs total, but ${INSTANCE_TYPE} has ${instance_gpus} GPUs."
|
|
534
|
+
echo " SageMaker will likely reject IC creation if capacity is exceeded."
|
|
535
|
+
echo " Consider reducing IC_GPU_COUNT values or using a larger instance type."
|
|
536
|
+
echo ""
|
|
537
|
+
fi
|
|
538
|
+
}
|
|
477
539
|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
540
|
+
# Run capacity guardrail before deploying ICs
|
|
541
|
+
_check_gpu_capacity
|
|
542
|
+
|
|
543
|
+
# _delete_and_wait_ic <ic_name>
|
|
544
|
+
# Deletes an inference component and waits for deletion to complete.
|
|
545
|
+
# Polls until the IC is no longer found (avoids name conflicts on recreate).
|
|
546
|
+
_delete_and_wait_ic() {
|
|
547
|
+
local ic_name="$1"
|
|
548
|
+
local delete_timeout=600 # 10 minutes max wait for deletion
|
|
549
|
+
|
|
550
|
+
echo "🗑️ Deleting inference component: ${ic_name}"
|
|
551
|
+
if ! aws sagemaker delete-inference-component \
|
|
552
|
+
--inference-component-name "${ic_name}" \
|
|
553
|
+
--region "${AWS_REGION}" 2>/dev/null; then
|
|
554
|
+
echo " ⚠️ Delete call failed (IC may already be gone). Continuing..."
|
|
555
|
+
return 0
|
|
556
|
+
fi
|
|
487
557
|
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
echo "⏳ Waiting for inference component to reach InService status..."
|
|
492
|
-
echo " This may take 5-10 minutes..."
|
|
493
|
-
echo " If this times out, re-run ./do/deploy to resume."
|
|
558
|
+
echo " Waiting for deletion to complete..."
|
|
559
|
+
local delete_start
|
|
560
|
+
delete_start=$(date +%s)
|
|
494
561
|
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
IC_WAIT_START=$(date +%s)
|
|
562
|
+
while true; do
|
|
563
|
+
local ic_status
|
|
564
|
+
ic_status=$(_get_ic_status "${ic_name}")
|
|
499
565
|
|
|
500
|
-
|
|
501
|
-
|
|
566
|
+
if [ -z "${ic_status}" ]; then
|
|
567
|
+
echo " ✅ Inference component deleted: ${ic_name}"
|
|
568
|
+
break
|
|
569
|
+
fi
|
|
502
570
|
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
571
|
+
local elapsed=$(( $(date +%s) - delete_start ))
|
|
572
|
+
if [ "${elapsed}" -ge "${delete_timeout}" ]; then
|
|
573
|
+
echo " ⚠️ Deletion timed out after ${delete_timeout}s. IC status: ${ic_status}"
|
|
574
|
+
echo " Proceeding anyway — SageMaker may reject the new IC if name conflicts."
|
|
575
|
+
break
|
|
576
|
+
fi
|
|
577
|
+
|
|
578
|
+
echo " $(date +%H:%M:%S) Deleting... (${ic_status}, ${elapsed}s elapsed)"
|
|
579
|
+
sleep 15
|
|
580
|
+
done
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
# _deploy_single_ic <conf_file>
|
|
584
|
+
# Deploys a single IC with per-IC idempotency:
|
|
585
|
+
# - If FORCE_IC is true: delete existing IC, clear state, create fresh
|
|
586
|
+
# - If IC_DEPLOYED_NAME is set and InService → skip
|
|
587
|
+
# - If IC_DEPLOYED_NAME is set and Creating → wait for it
|
|
588
|
+
# - If IC_DEPLOYED_NAME is set and Failed → recreate with new timestamp
|
|
589
|
+
# - If IC_DEPLOYED_NAME is not set → create new IC
|
|
590
|
+
# Fail-fast: exits immediately on failure.
|
|
591
|
+
_deploy_single_ic() {
|
|
592
|
+
local ic_conf="$1"
|
|
593
|
+
local ic_basename
|
|
594
|
+
ic_basename=$(basename "${ic_conf}" .conf)
|
|
595
|
+
|
|
596
|
+
# Source the IC config to check IC_DEPLOYED_NAME
|
|
597
|
+
# Use a subshell-safe approach: read the variable without polluting scope
|
|
598
|
+
local existing_ic_name=""
|
|
599
|
+
if grep -q "^export IC_DEPLOYED_NAME=" "${ic_conf}" 2>/dev/null; then
|
|
600
|
+
existing_ic_name=$(grep "^export IC_DEPLOYED_NAME=" "${ic_conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
|
|
601
|
+
fi
|
|
602
|
+
|
|
603
|
+
# --force-ic: delete existing IC before recreating
|
|
604
|
+
if [ "${FORCE_IC}" = true ] && [ -n "${existing_ic_name}" ]; then
|
|
605
|
+
echo "🔄 --force-ic: recreating IC '${ic_basename}'"
|
|
606
|
+
_delete_and_wait_ic "${existing_ic_name}"
|
|
607
|
+
|
|
608
|
+
# Clear deployed state from config before recreating
|
|
609
|
+
_update_config_var "IC_DEPLOYED_NAME" "" "${ic_conf}"
|
|
610
|
+
_update_config_var "IC_DEPLOYED_AT" "" "${ic_conf}"
|
|
611
|
+
existing_ic_name=""
|
|
612
|
+
fi
|
|
613
|
+
|
|
614
|
+
if [ "${FORCE_IC}" = true ] && [ -z "${existing_ic_name}" ]; then
|
|
615
|
+
# Force mode with no existing IC — just create new
|
|
616
|
+
create_inference_component "${ic_conf}"
|
|
617
|
+
elif [ -n "${existing_ic_name}" ]; then
|
|
618
|
+
# IC was previously deployed — check its current status
|
|
619
|
+
local ic_status
|
|
620
|
+
ic_status=$(_get_ic_status "${existing_ic_name}")
|
|
621
|
+
|
|
622
|
+
case "${ic_status}" in
|
|
623
|
+
InService)
|
|
624
|
+
echo "✅ IC '${ic_basename}' already InService: ${existing_ic_name} — skipping"
|
|
625
|
+
IC_DEPLOYED_NAME="${existing_ic_name}"
|
|
626
|
+
return 0
|
|
627
|
+
;;
|
|
628
|
+
Creating)
|
|
629
|
+
echo "⏳ IC '${ic_basename}' is still Creating: ${existing_ic_name} — waiting..."
|
|
630
|
+
IC_DEPLOYED_NAME="${existing_ic_name}"
|
|
631
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
632
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
633
|
+
return 0
|
|
634
|
+
;;
|
|
635
|
+
Failed)
|
|
636
|
+
echo "⚠️ IC '${ic_basename}' previously Failed: ${existing_ic_name} — recreating..."
|
|
637
|
+
create_inference_component "${ic_conf}"
|
|
638
|
+
;;
|
|
639
|
+
*)
|
|
640
|
+
echo " IC '${ic_basename}' has unknown/missing status for ${existing_ic_name} — creating new..."
|
|
641
|
+
create_inference_component "${ic_conf}"
|
|
642
|
+
;;
|
|
643
|
+
esac
|
|
644
|
+
else
|
|
645
|
+
# No previous deployment — create new IC
|
|
646
|
+
create_inference_component "${ic_conf}"
|
|
647
|
+
fi
|
|
648
|
+
|
|
649
|
+
echo "⏳ Waiting for inference component to reach InService status..."
|
|
650
|
+
echo " This may take 5-10 minutes..."
|
|
651
|
+
|
|
652
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
653
|
+
|
|
654
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
655
|
+
|
|
656
|
+
# Record inference component in manifest (non-blocking)
|
|
657
|
+
local ic_arn="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_DEPLOYED_NAME}"
|
|
658
|
+
./do/manifest add \
|
|
659
|
+
--type sagemaker-inference-component \
|
|
660
|
+
--id "${ic_arn}" \
|
|
661
|
+
--project "${PROJECT_NAME}" \
|
|
662
|
+
--meta "{\"inferenceComponentName\":\"${IC_DEPLOYED_NAME}\",\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE:-external}\",\"region\":\"${AWS_REGION}\"}" \
|
|
663
|
+
2>/dev/null || true
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
if [ -n "${IC_TARGET}" ]; then
|
|
667
|
+
# Single IC path: deploy only the named IC
|
|
511
668
|
echo ""
|
|
512
|
-
echo "
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
echo " Re-run ./do/deploy to resume waiting."
|
|
669
|
+
echo "── Deploying IC: ${IC_TARGET} ──"
|
|
670
|
+
_deploy_single_ic "${SCRIPT_DIR}/ic/${IC_TARGET}.conf"
|
|
671
|
+
else
|
|
672
|
+
# Multi-IC path: iterate all IC config files (alphabetical order)
|
|
673
|
+
IC_SUMMARY=""
|
|
674
|
+
IC_DEPLOY_FAILED=false
|
|
675
|
+
|
|
676
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
677
|
+
[ -f "${conf}" ] || continue
|
|
678
|
+
local_ic_basename=$(basename "${conf}" .conf)
|
|
523
679
|
echo ""
|
|
524
|
-
echo "
|
|
525
|
-
|
|
680
|
+
echo "── Deploying IC: ${local_ic_basename} ──"
|
|
681
|
+
|
|
682
|
+
if ! _deploy_single_ic "${conf}"; then
|
|
683
|
+
echo "❌ IC '${local_ic_basename}' failed to deploy. Stopping."
|
|
684
|
+
IC_SUMMARY="${IC_SUMMARY} ${local_ic_basename}: FAILED\n"
|
|
685
|
+
IC_DEPLOY_FAILED=true
|
|
686
|
+
break
|
|
687
|
+
fi
|
|
688
|
+
|
|
689
|
+
IC_SUMMARY="${IC_SUMMARY} ${local_ic_basename}: ${IC_DEPLOYED_NAME} [InService]\n"
|
|
690
|
+
done
|
|
691
|
+
|
|
692
|
+
# Print summary
|
|
693
|
+
echo ""
|
|
694
|
+
echo "📋 IC Deployment Summary:"
|
|
695
|
+
echo -e "${IC_SUMMARY}"
|
|
696
|
+
|
|
697
|
+
if [ "${IC_DEPLOY_FAILED}" = true ]; then
|
|
698
|
+
echo "❌ Deployment stopped due to IC failure. Fix the issue and re-run ./do/deploy to resume."
|
|
526
699
|
exit 4
|
|
527
700
|
fi
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
701
|
+
fi
|
|
702
|
+
else
|
|
703
|
+
# Legacy single-IC path: no do/ic/ directory
|
|
704
|
+
create_inference_component_legacy
|
|
705
|
+
|
|
706
|
+
echo "⏳ Waiting for inference component to reach InService status..."
|
|
707
|
+
echo " This may take 5-10 minutes..."
|
|
708
|
+
echo " If this times out, re-run ./do/deploy to resume."
|
|
709
|
+
|
|
710
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
711
|
+
|
|
712
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
713
|
+
|
|
714
|
+
# Record inference component in manifest (non-blocking)
|
|
715
|
+
IC_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_DEPLOYED_NAME}"
|
|
716
|
+
./do/manifest add \
|
|
717
|
+
--type sagemaker-inference-component \
|
|
718
|
+
--id "${IC_ARN}" \
|
|
719
|
+
--project "${PROJECT_NAME}" \
|
|
720
|
+
--meta "{\"inferenceComponentName\":\"${IC_DEPLOYED_NAME}\",\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE:-external}\",\"region\":\"${AWS_REGION}\"}" \
|
|
721
|
+
2>/dev/null || true
|
|
722
|
+
fi
|
|
723
|
+
|
|
724
|
+
elif [ "${SKIP_TO}" = "wait_ic" ]; then
|
|
725
|
+
# Resuming: just wait for the IC that was already being created
|
|
726
|
+
echo "⏳ Waiting for inference component to reach InService status..."
|
|
727
|
+
echo " This may take 5-10 minutes..."
|
|
728
|
+
echo " If this times out, re-run ./do/deploy to resume."
|
|
729
|
+
|
|
730
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
731
|
+
|
|
732
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
733
|
+
fi
|
|
542
734
|
|
|
543
735
|
echo "✅ Deployment complete!"
|
|
544
736
|
echo ""
|
|
545
737
|
echo "📋 Deployment Details:"
|
|
546
738
|
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
547
|
-
|
|
548
|
-
echo "
|
|
549
|
-
echo " Region: ${AWS_REGION}"
|
|
550
|
-
|
|
739
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
740
|
+
echo " Endpoint Config: (external — not managed by this project)"
|
|
741
|
+
echo " Region: ${AWS_REGION}"
|
|
742
|
+
else
|
|
743
|
+
echo " Endpoint Config: ${ENDPOINT_CONFIG_NAME:-N/A}"
|
|
744
|
+
echo " Region: ${AWS_REGION}"
|
|
745
|
+
echo " Instance Type: ${INSTANCE_TYPE}"
|
|
746
|
+
fi
|
|
551
747
|
echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
552
748
|
echo ""
|
|
553
|
-
echo "
|
|
554
|
-
echo " ./do/test"
|
|
555
|
-
|
|
556
|
-
echo "
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
echo "
|
|
560
|
-
|
|
561
|
-
echo "
|
|
562
|
-
echo ""
|
|
563
|
-
echo "
|
|
564
|
-
|
|
749
|
+
echo "📋 What's next?"
|
|
750
|
+
echo " • Test your endpoint: ./do/test"
|
|
751
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
752
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
753
|
+
<% } %>
|
|
754
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
755
|
+
echo " • Add a LoRA adapter: ./do/adapter add <name> --weights s3://..."
|
|
756
|
+
<% } %>
|
|
757
|
+
echo " • View endpoint status: ./do/status"
|
|
758
|
+
echo " • Register this deployment: ./do/register"
|
|
759
|
+
echo " • View logs: ./do/logs"
|
|
760
|
+
<% if (!(typeof existingEndpointName !== 'undefined' && existingEndpointName)) { %>
|
|
761
|
+
echo " • Clean up when done: ./do/clean endpoint"
|
|
762
|
+
<% } %>
|
|
565
763
|
|
|
566
764
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
567
765
|
# ============================================================
|
|
@@ -570,6 +768,13 @@ echo " ./do/clean endpoint"
|
|
|
570
768
|
# Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
|
|
571
769
|
# ============================================================
|
|
572
770
|
|
|
771
|
+
# Source shared helpers
|
|
772
|
+
source "${SCRIPT_DIR}/lib/secrets.sh"
|
|
773
|
+
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
774
|
+
|
|
775
|
+
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
776
|
+
resolve_secrets
|
|
777
|
+
|
|
573
778
|
# Validate execution role ARN
|
|
574
779
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
575
780
|
echo "❌ Execution role ARN not provided"
|
|
@@ -732,27 +937,6 @@ ASYNC_SNS_ERROR_TOPIC_NAME=$(echo "${ASYNC_SNS_ERROR_TOPIC}" | awk -F: '{print $
|
|
|
732
937
|
# Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
|
|
733
938
|
# ============================================================
|
|
734
939
|
|
|
735
|
-
# Helper: persist a variable to do/config so other scripts can use it
|
|
736
|
-
_update_config_var() {
|
|
737
|
-
local var_name="$1" var_value="$2" config_file="${SCRIPT_DIR}/config"
|
|
738
|
-
if grep -q "^export ${var_name}=" "${config_file}" 2>/dev/null; then
|
|
739
|
-
sed -i.bak "s|^export ${var_name}=.*|export ${var_name}=\"${var_value}\"|" "${config_file}"
|
|
740
|
-
rm -f "${config_file}.bak"
|
|
741
|
-
else
|
|
742
|
-
echo "" >> "${config_file}"
|
|
743
|
-
echo "export ${var_name}=\"${var_value}\"" >> "${config_file}"
|
|
744
|
-
fi
|
|
745
|
-
}
|
|
746
|
-
|
|
747
|
-
# Helper: query a SageMaker resource status, returns empty string if not found
|
|
748
|
-
_get_endpoint_status() {
|
|
749
|
-
aws sagemaker describe-endpoint \
|
|
750
|
-
--endpoint-name "$1" \
|
|
751
|
-
--region "${AWS_REGION}" \
|
|
752
|
-
--query EndpointStatus \
|
|
753
|
-
--output text 2>/dev/null || echo ""
|
|
754
|
-
}
|
|
755
|
-
|
|
756
940
|
# ============================================================
|
|
757
941
|
# Idempotency: check for existing deployment from a previous run
|
|
758
942
|
# ============================================================
|
|
@@ -928,27 +1112,7 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
|
928
1112
|
echo " This may take several minutes..."
|
|
929
1113
|
echo " If this times out, re-run ./do/deploy to resume."
|
|
930
1114
|
|
|
931
|
-
|
|
932
|
-
--endpoint-name "${ENDPOINT_NAME}" \
|
|
933
|
-
--region "${AWS_REGION}"; then
|
|
934
|
-
|
|
935
|
-
# Check if it was a credential expiration vs actual failure
|
|
936
|
-
EP_CHECK=$(_get_endpoint_status "${ENDPOINT_NAME}" 2>/dev/null)
|
|
937
|
-
if [ "${EP_CHECK}" = "Creating" ]; then
|
|
938
|
-
echo ""
|
|
939
|
-
echo "⚠️ Wait interrupted (credentials may have expired), but endpoint is still creating."
|
|
940
|
-
echo " Refresh your credentials and re-run ./do/deploy to resume."
|
|
941
|
-
echo ""
|
|
942
|
-
echo " Or check status manually:"
|
|
943
|
-
echo " aws sagemaker describe-endpoint --endpoint-name ${ENDPOINT_NAME} --region ${AWS_REGION} --query EndpointStatus"
|
|
944
|
-
exit 4
|
|
945
|
-
fi
|
|
946
|
-
|
|
947
|
-
echo "❌ Async endpoint failed to reach InService status"
|
|
948
|
-
echo " Check CloudWatch Logs for details:"
|
|
949
|
-
echo " https://console.aws.amazon.com/cloudwatch/home?region=${AWS_REGION}#logsV2:log-groups/log-group//aws/sagemaker/Endpoints/${ENDPOINT_NAME}"
|
|
950
|
-
exit 4
|
|
951
|
-
fi
|
|
1115
|
+
wait_endpoint "${ENDPOINT_NAME}"
|
|
952
1116
|
fi
|
|
953
1117
|
|
|
954
1118
|
echo "✅ Async deployment complete!"
|
|
@@ -964,17 +1128,15 @@ echo " S3 Output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
|
964
1128
|
echo " SNS Success: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
965
1129
|
echo " SNS Error: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
966
1130
|
echo ""
|
|
967
|
-
echo "
|
|
968
|
-
echo " ./do/test"
|
|
969
|
-
echo ""
|
|
970
|
-
|
|
971
|
-
echo " ./do/
|
|
972
|
-
|
|
973
|
-
echo "
|
|
974
|
-
echo "
|
|
975
|
-
echo ""
|
|
976
|
-
echo "🧹 Clean up when done:"
|
|
977
|
-
echo " ./do/clean endpoint"
|
|
1131
|
+
echo "📋 What's next?"
|
|
1132
|
+
echo " • Test your async endpoint: ./do/test"
|
|
1133
|
+
echo " • Check async output: aws s3 ls ${ASYNC_S3_OUTPUT_PATH}"
|
|
1134
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
1135
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
1136
|
+
<% } %>
|
|
1137
|
+
echo " • Register this deployment: ./do/register"
|
|
1138
|
+
echo " • View logs: ./do/logs"
|
|
1139
|
+
echo " • Clean up when done: ./do/clean endpoint"
|
|
978
1140
|
|
|
979
1141
|
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
980
1142
|
# ============================================================
|
|
@@ -1170,22 +1332,16 @@ echo " Deployment: ${PROJECT_NAME}"
|
|
|
1170
1332
|
echo " Replicas: ${HYPERPOD_REPLICAS}"
|
|
1171
1333
|
echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
1172
1334
|
echo ""
|
|
1173
|
-
echo "
|
|
1174
|
-
echo "
|
|
1175
|
-
echo " kubectl get pods -n ${HYPERPOD_NAMESPACE}"
|
|
1176
|
-
echo " kubectl
|
|
1177
|
-
|
|
1178
|
-
echo "
|
|
1179
|
-
|
|
1180
|
-
echo ""
|
|
1181
|
-
echo "
|
|
1182
|
-
echo " ./do/
|
|
1183
|
-
echo ""
|
|
1184
|
-
echo "📋 View logs:"
|
|
1185
|
-
echo " ./do/logs"
|
|
1186
|
-
echo ""
|
|
1187
|
-
echo "🧹 Clean up when done:"
|
|
1188
|
-
echo " ./do/clean hyperpod"
|
|
1335
|
+
echo "📋 What's next?"
|
|
1336
|
+
echo " • Test your deployment: ./do/test"
|
|
1337
|
+
echo " • Check pod status: kubectl get pods -n ${HYPERPOD_NAMESPACE}"
|
|
1338
|
+
echo " • View pod logs: kubectl logs -n ${HYPERPOD_NAMESPACE} -l app=${PROJECT_NAME}"
|
|
1339
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
1340
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
1341
|
+
<% } %>
|
|
1342
|
+
echo " • Register this deployment: ./do/register"
|
|
1343
|
+
echo " • View logs: ./do/logs"
|
|
1344
|
+
echo " • Clean up when done: ./do/clean hyperpod"
|
|
1189
1345
|
|
|
1190
1346
|
# Write kubeconfig path to config so other scripts can use it (idempotent)
|
|
1191
1347
|
_update_config_var() {
|
|
@@ -1207,6 +1363,13 @@ _update_config_var "KUBECONFIG" "${KUBECONFIG_PATH}"
|
|
|
1207
1363
|
# Flow: create-model → create-transform-job → poll until completion
|
|
1208
1364
|
# ============================================================
|
|
1209
1365
|
|
|
1366
|
+
# Source shared helpers
|
|
1367
|
+
source "${SCRIPT_DIR}/lib/secrets.sh"
|
|
1368
|
+
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
1369
|
+
|
|
1370
|
+
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
1371
|
+
resolve_secrets
|
|
1372
|
+
|
|
1210
1373
|
# Validate execution role ARN
|
|
1211
1374
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
1212
1375
|
echo "❌ Execution role ARN not provided"
|
|
@@ -1359,18 +1522,6 @@ fi
|
|
|
1359
1522
|
echo "✅ Using custom S3 output path: ${BATCH_OUTPUT_PATH}"
|
|
1360
1523
|
<% } %>
|
|
1361
1524
|
|
|
1362
|
-
# Helper: persist a variable to do/config so other scripts can use it
|
|
1363
|
-
_update_config_var() {
|
|
1364
|
-
local var_name="$1" var_value="$2" config_file="${SCRIPT_DIR}/config"
|
|
1365
|
-
if grep -q "^export ${var_name}=" "${config_file}" 2>/dev/null; then
|
|
1366
|
-
sed -i.bak "s|^export ${var_name}=.*|export ${var_name}=\"${var_value}\"|" "${config_file}"
|
|
1367
|
-
rm -f "${config_file}.bak"
|
|
1368
|
-
else
|
|
1369
|
-
echo "" >> "${config_file}"
|
|
1370
|
-
echo "export ${var_name}=\"${var_value}\"" >> "${config_file}"
|
|
1371
|
-
fi
|
|
1372
|
-
}
|
|
1373
|
-
|
|
1374
1525
|
# ============================================================
|
|
1375
1526
|
# Check for previous transform job still running
|
|
1376
1527
|
# ============================================================
|
|
@@ -1605,16 +1756,11 @@ else
|
|
|
1605
1756
|
fi
|
|
1606
1757
|
|
|
1607
1758
|
echo ""
|
|
1608
|
-
echo "
|
|
1609
|
-
echo "
|
|
1610
|
-
echo ""
|
|
1611
|
-
echo "
|
|
1612
|
-
echo " ./do/
|
|
1613
|
-
echo ""
|
|
1614
|
-
echo "📋 View logs:"
|
|
1615
|
-
echo " ./do/logs"
|
|
1616
|
-
echo ""
|
|
1617
|
-
echo "🧹 Clean up when done:"
|
|
1618
|
-
echo " ./do/clean"
|
|
1759
|
+
echo "📋 What's next?"
|
|
1760
|
+
echo " • View results: cat batch-output/"
|
|
1761
|
+
echo " • Review results: ./do/test"
|
|
1762
|
+
echo " • Register this deployment: ./do/register"
|
|
1763
|
+
echo " • View logs: ./do/logs"
|
|
1764
|
+
echo " • Clean up when done: ./do/clean"
|
|
1619
1765
|
|
|
1620
1766
|
<% } %>
|