@aws/ml-container-creator 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +5 -2
- package/config/bootstrap-stack.json +86 -7
- package/config/defaults.json +1 -1
- package/infra/ci-harness/buildspec.yml +60 -0
- package/package.json +3 -1
- package/servers/README.md +41 -1
- package/servers/instance-sizer/index.js +42 -2
- package/servers/instance-sizer/lib/instance-ranker.js +114 -10
- package/servers/instance-sizer/lib/quota-resolver.js +368 -0
- package/servers/instance-sizer/package.json +2 -0
- package/servers/lib/catalogs/instances.json +527 -12
- package/servers/lib/catalogs/model-servers.json +15 -15
- package/servers/lib/catalogs/model-sizes.json +27 -0
- package/servers/lib/catalogs/models.json +71 -0
- package/servers/lib/schemas/image-catalog.schema.json +9 -1
- package/src/app.js +109 -3
- package/src/lib/bootstrap-command-handler.js +96 -3
- package/src/lib/cli-handler.js +2 -2
- package/src/lib/config-manager.js +117 -1
- package/src/lib/deployment-entry-schema.js +16 -0
- package/src/lib/prompt-runner.js +270 -12
- package/src/lib/prompts.js +288 -6
- package/src/lib/registry-command-handler.js +12 -0
- package/src/lib/schema-sync.js +31 -0
- package/src/lib/template-manager.js +49 -1
- package/src/lib/validate-runner.js +125 -2
- package/templates/Dockerfile +22 -2
- package/templates/code/cuda_compat.sh +22 -0
- package/templates/code/serve +3 -0
- package/templates/code/serving.properties +14 -0
- package/templates/code/start_server.sh +3 -0
- package/templates/diffusors/Dockerfile +2 -1
- package/templates/diffusors/serve +3 -0
- package/templates/do/README.md +33 -0
- package/templates/do/adapter +1214 -0
- package/templates/do/adapters/.gitkeep +2 -0
- package/templates/do/add-ic +130 -0
- package/templates/do/benchmark +718 -0
- package/templates/do/clean +593 -17
- package/templates/do/config +49 -4
- package/templates/do/deploy +513 -362
- package/templates/do/ic/default.conf +32 -0
- package/templates/do/lib/endpoint-config.sh +216 -0
- package/templates/do/lib/inference-component.sh +167 -0
- package/templates/do/lib/secrets.sh +44 -0
- package/templates/do/lib/wait.sh +131 -0
- package/templates/do/logs +107 -27
- package/templates/do/optimize +528 -0
- package/templates/do/register +119 -2
- package/templates/do/status +337 -0
- package/templates/do/test +80 -28
- package/templates/triton/Dockerfile +5 -0
package/templates/do/deploy
CHANGED
|
@@ -9,20 +9,59 @@ set -o pipefail
|
|
|
9
9
|
# Parse flags
|
|
10
10
|
FORCE_NEW=false
|
|
11
11
|
FORCE_IC=false
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
--force
|
|
12
|
+
IC_TARGET=""
|
|
13
|
+
while [ $# -gt 0 ]; do
|
|
14
|
+
case "$1" in
|
|
15
|
+
--force) FORCE_NEW=true; shift ;;
|
|
16
|
+
--force-ic)
|
|
17
|
+
FORCE_IC=true
|
|
18
|
+
shift
|
|
19
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
20
|
+
# Optional name argument: --force-ic <name>
|
|
21
|
+
if [ $# -gt 0 ] && [[ ! "$1" == --* ]]; then
|
|
22
|
+
IC_TARGET="$1"
|
|
23
|
+
shift
|
|
24
|
+
fi
|
|
25
|
+
<% } %>
|
|
26
|
+
;;
|
|
27
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
28
|
+
--ic)
|
|
29
|
+
if [ -z "${2:-}" ]; then
|
|
30
|
+
echo "❌ --ic requires a name argument"
|
|
31
|
+
echo " Usage: ./do/deploy --ic <name>"
|
|
32
|
+
exit 1
|
|
33
|
+
fi
|
|
34
|
+
IC_TARGET="$2"
|
|
35
|
+
shift 2
|
|
36
|
+
;;
|
|
37
|
+
<% } %>
|
|
16
38
|
--help|-h)
|
|
39
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
40
|
+
echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>]"
|
|
41
|
+
echo ""
|
|
42
|
+
echo "Options:"
|
|
43
|
+
echo " --force Create a new endpoint and IC, even if one already exists."
|
|
44
|
+
echo " --force-ic Recreate ALL inference components on the existing endpoint."
|
|
45
|
+
echo " --force-ic <name> Recreate only the named IC on the existing endpoint."
|
|
46
|
+
echo " --ic <name> Deploy only the named IC (from do/ic/<name>.conf)."
|
|
47
|
+
echo ""
|
|
48
|
+
echo "Without flags, deploy resumes from the last run."
|
|
49
|
+
<% } else { %>
|
|
17
50
|
echo "Usage: ./do/deploy [--force] [--force-ic]"
|
|
18
51
|
echo ""
|
|
19
52
|
echo "Options:"
|
|
20
|
-
echo " --force Create a new endpoint
|
|
21
|
-
echo " --force-ic Recreate
|
|
53
|
+
echo " --force Create a new endpoint, even if one already exists."
|
|
54
|
+
echo " --force-ic Recreate the inference component on the existing endpoint."
|
|
22
55
|
echo ""
|
|
23
56
|
echo "Without flags, deploy resumes from the last run."
|
|
57
|
+
<% } %>
|
|
24
58
|
exit 0
|
|
25
59
|
;;
|
|
60
|
+
*)
|
|
61
|
+
echo "❌ Unknown option: $1"
|
|
62
|
+
echo " Run ./do/deploy --help for usage."
|
|
63
|
+
exit 1
|
|
64
|
+
;;
|
|
26
65
|
esac
|
|
27
66
|
done
|
|
28
67
|
|
|
@@ -37,7 +76,11 @@ echo " Region: ${AWS_REGION}"
|
|
|
37
76
|
echo " Build target: ${BUILD_TARGET}"
|
|
38
77
|
echo " Deployment target: ${DEPLOYMENT_TARGET}"
|
|
39
78
|
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
40
|
-
|
|
79
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
80
|
+
echo " Endpoint: ${ENDPOINT_NAME} (external)"
|
|
81
|
+
else
|
|
82
|
+
echo " Instance type: ${INSTANCE_TYPE}"
|
|
83
|
+
fi
|
|
41
84
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
42
85
|
echo " Instance type: ${INSTANCE_TYPE}"
|
|
43
86
|
echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
@@ -135,6 +178,12 @@ fi
|
|
|
135
178
|
# SageMaker Real-Time Inference Deployment (Inference Components)
|
|
136
179
|
# ============================================================
|
|
137
180
|
|
|
181
|
+
# Source shared helpers
|
|
182
|
+
source "${SCRIPT_DIR}/lib/secrets.sh"
|
|
183
|
+
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
184
|
+
source "${SCRIPT_DIR}/lib/endpoint-config.sh"
|
|
185
|
+
source "${SCRIPT_DIR}/lib/inference-component.sh"
|
|
186
|
+
|
|
138
187
|
# Validate execution role ARN
|
|
139
188
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
140
189
|
echo "❌ Execution role ARN not provided"
|
|
@@ -155,44 +204,30 @@ fi
|
|
|
155
204
|
|
|
156
205
|
echo " Using execution role: ${ROLE_ARN}"
|
|
157
206
|
|
|
158
|
-
#
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
echo "" >> "${config_file}"
|
|
166
|
-
echo "export ${var_name}=\"${var_value}\"" >> "${config_file}"
|
|
207
|
+
# Validate --ic argument if specified (set by --ic <name> or --force-ic <name>)
|
|
208
|
+
if [ -n "${IC_TARGET}" ]; then
|
|
209
|
+
if [ ! -d "${SCRIPT_DIR}/ic" ]; then
|
|
210
|
+
echo "❌ IC name specified but no do/ic/ directory found"
|
|
211
|
+
echo " This project does not use multi-IC configuration."
|
|
212
|
+
echo " Remove --ic/--force-ic <name> to deploy using the legacy single-IC path."
|
|
213
|
+
exit 1
|
|
167
214
|
fi
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
--inference-component-name "$1" \
|
|
182
|
-
--region "${AWS_REGION}" \
|
|
183
|
-
--query InferenceComponentStatus \
|
|
184
|
-
--output text 2>/dev/null || echo ""
|
|
185
|
-
}
|
|
215
|
+
if [ ! -f "${SCRIPT_DIR}/ic/${IC_TARGET}.conf" ]; then
|
|
216
|
+
echo "❌ IC config not found: do/ic/${IC_TARGET}.conf"
|
|
217
|
+
echo ""
|
|
218
|
+
echo " Available ICs:"
|
|
219
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
220
|
+
[ -f "${conf}" ] || continue
|
|
221
|
+
echo " • $(basename "${conf}" .conf)"
|
|
222
|
+
done
|
|
223
|
+
echo ""
|
|
224
|
+
echo " Usage: ./do/deploy --ic <name>"
|
|
225
|
+
exit 1
|
|
226
|
+
fi
|
|
227
|
+
fi
|
|
186
228
|
|
|
187
|
-
#
|
|
188
|
-
|
|
189
|
-
aws sagemaker list-inference-components \
|
|
190
|
-
--endpoint-name "$1" \
|
|
191
|
-
--status-equals InService \
|
|
192
|
-
--region "${AWS_REGION}" \
|
|
193
|
-
--query 'InferenceComponents[0].InferenceComponentName' \
|
|
194
|
-
--output text 2>/dev/null || echo ""
|
|
195
|
-
}
|
|
229
|
+
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
230
|
+
resolve_secrets
|
|
196
231
|
|
|
197
232
|
# ============================================================
|
|
198
233
|
# Idempotency: check for existing deployment from a previous run
|
|
@@ -204,7 +239,11 @@ if [ "${FORCE_NEW}" = true ]; then
|
|
|
204
239
|
elif [ "${FORCE_IC}" = true ] && [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
205
240
|
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
206
241
|
if [ "${EP_STATUS}" = "InService" ]; then
|
|
207
|
-
|
|
242
|
+
if [ -n "${IC_TARGET}" ]; then
|
|
243
|
+
echo "🔄 --force-ic: recreating IC '${IC_TARGET}' on existing endpoint: ${ENDPOINT_NAME}"
|
|
244
|
+
else
|
|
245
|
+
echo "🔄 --force-ic: recreating ALL inference components on existing endpoint: ${ENDPOINT_NAME}"
|
|
246
|
+
fi
|
|
208
247
|
SKIP_TO="create_ic"
|
|
209
248
|
else
|
|
210
249
|
echo "⚠️ --force-ic requires an InService endpoint, but ${ENDPOINT_NAME} is: ${EP_STATUS:-not found}"
|
|
@@ -242,7 +281,7 @@ elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
|
242
281
|
Creating)
|
|
243
282
|
echo "⏳ Inference component still creating: ${INFERENCE_COMPONENT_NAME}"
|
|
244
283
|
SKIP_TO="wait_ic"
|
|
245
|
-
|
|
284
|
+
IC_DEPLOYED_NAME="${INFERENCE_COMPONENT_NAME}"
|
|
246
285
|
;;
|
|
247
286
|
Failed)
|
|
248
287
|
echo "⚠️ Inference component failed: ${INFERENCE_COMPONENT_NAME}"
|
|
@@ -251,47 +290,59 @@ elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
|
251
290
|
;;
|
|
252
291
|
*)
|
|
253
292
|
# Stored IC not found — check if a different IC is running on this endpoint
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
echo "
|
|
257
|
-
echo " (config had stale reference: ${INFERENCE_COMPONENT_NAME})"
|
|
258
|
-
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
259
|
-
echo ""
|
|
260
|
-
echo "📋 Deployment is already live. Nothing to do."
|
|
261
|
-
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
262
|
-
echo " Inference Component: ${LIVE_IC}"
|
|
263
|
-
echo ""
|
|
264
|
-
echo "🧪 Test your endpoint:"
|
|
265
|
-
echo " ./do/test"
|
|
266
|
-
echo ""
|
|
267
|
-
echo "🧹 Clean up when done:"
|
|
268
|
-
echo " ./do/clean endpoint"
|
|
269
|
-
exit 0
|
|
270
|
-
else
|
|
271
|
-
echo " No existing inference component found on endpoint. Will create one."
|
|
293
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
294
|
+
# External endpoint: never adopt ICs we didn't create
|
|
295
|
+
echo " Stored IC not found on external endpoint. Will create a new one."
|
|
272
296
|
SKIP_TO="create_ic"
|
|
297
|
+
else
|
|
298
|
+
LIVE_IC=$(_find_active_ic_on_endpoint "${ENDPOINT_NAME}")
|
|
299
|
+
if [ -n "${LIVE_IC}" ] && [ "${LIVE_IC}" != "None" ]; then
|
|
300
|
+
echo "✅ Found running inference component on endpoint: ${LIVE_IC}"
|
|
301
|
+
echo " (config had stale reference: ${INFERENCE_COMPONENT_NAME})"
|
|
302
|
+
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
303
|
+
echo ""
|
|
304
|
+
echo "📋 Deployment is already live. Nothing to do."
|
|
305
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
306
|
+
echo " Inference Component: ${LIVE_IC}"
|
|
307
|
+
echo ""
|
|
308
|
+
echo "🧪 Test your endpoint:"
|
|
309
|
+
echo " ./do/test"
|
|
310
|
+
echo ""
|
|
311
|
+
echo "🧹 Clean up when done:"
|
|
312
|
+
echo " ./do/clean endpoint"
|
|
313
|
+
exit 0
|
|
314
|
+
else
|
|
315
|
+
echo " No existing inference component found on endpoint. Will create one."
|
|
316
|
+
SKIP_TO="create_ic"
|
|
317
|
+
fi
|
|
273
318
|
fi
|
|
274
319
|
;;
|
|
275
320
|
esac
|
|
276
321
|
else
|
|
277
322
|
# No IC name in config — check if one is already running on the endpoint
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
echo "
|
|
281
|
-
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
282
|
-
echo ""
|
|
283
|
-
echo "📋 Deployment is already live. Nothing to do."
|
|
284
|
-
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
285
|
-
echo " Inference Component: ${LIVE_IC}"
|
|
286
|
-
echo ""
|
|
287
|
-
echo "🧪 Test your endpoint:"
|
|
288
|
-
echo " ./do/test"
|
|
289
|
-
echo ""
|
|
290
|
-
echo "🧹 Clean up when done:"
|
|
291
|
-
echo " ./do/clean endpoint"
|
|
292
|
-
exit 0
|
|
293
|
-
else
|
|
323
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
324
|
+
# External endpoint: never adopt ICs we didn't create
|
|
325
|
+
echo " No previous IC deployed by this project. Will create a new one."
|
|
294
326
|
SKIP_TO="create_ic"
|
|
327
|
+
else
|
|
328
|
+
LIVE_IC=$(_find_active_ic_on_endpoint "${ENDPOINT_NAME}")
|
|
329
|
+
if [ -n "${LIVE_IC}" ] && [ "${LIVE_IC}" != "None" ]; then
|
|
330
|
+
echo "✅ Found running inference component on endpoint: ${LIVE_IC}"
|
|
331
|
+
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
332
|
+
echo ""
|
|
333
|
+
echo "📋 Deployment is already live. Nothing to do."
|
|
334
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
335
|
+
echo " Inference Component: ${LIVE_IC}"
|
|
336
|
+
echo ""
|
|
337
|
+
echo "🧪 Test your endpoint:"
|
|
338
|
+
echo " ./do/test"
|
|
339
|
+
echo ""
|
|
340
|
+
echo "🧹 Clean up when done:"
|
|
341
|
+
echo " ./do/clean endpoint"
|
|
342
|
+
exit 0
|
|
343
|
+
else
|
|
344
|
+
SKIP_TO="create_ic"
|
|
345
|
+
fi
|
|
295
346
|
fi
|
|
296
347
|
fi
|
|
297
348
|
;;
|
|
@@ -316,247 +367,399 @@ elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
|
316
367
|
fi
|
|
317
368
|
|
|
318
369
|
# ============================================================
|
|
319
|
-
# Step 1: Create endpoint configuration (skip if resuming)
|
|
370
|
+
# Step 1: Create endpoint configuration and endpoint (skip if resuming)
|
|
320
371
|
# ============================================================
|
|
321
372
|
if [ -z "${SKIP_TO}" ]; then
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
373
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
374
|
+
# External endpoint: validate it still exists and is InService
|
|
375
|
+
echo "🔗 Using external endpoint: ${ENDPOINT_NAME}"
|
|
376
|
+
echo " Validating endpoint status..."
|
|
326
377
|
|
|
327
|
-
|
|
328
|
-
_update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
|
|
329
|
-
_update_config_var "INFERENCE_COMPONENT_NAME" "${IC_NAME}"
|
|
378
|
+
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
330
379
|
|
|
331
|
-
|
|
332
|
-
|
|
380
|
+
if [ -z "${EP_STATUS}" ]; then
|
|
381
|
+
echo "❌ External endpoint not found: ${ENDPOINT_NAME}"
|
|
382
|
+
echo " The endpoint may have been deleted. Update ENDPOINT_NAME in do/config"
|
|
383
|
+
echo " or remove ENDPOINT_EXTERNAL=true to create a new endpoint."
|
|
384
|
+
exit 4
|
|
385
|
+
fi
|
|
333
386
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
387
|
+
if [ "${EP_STATUS}" != "InService" ]; then
|
|
388
|
+
echo "❌ External endpoint not InService: ${ENDPOINT_NAME} (status: ${EP_STATUS})"
|
|
389
|
+
echo " The endpoint must be InService before attaching inference components."
|
|
390
|
+
echo " Wait for the endpoint to become InService, or update do/config."
|
|
391
|
+
exit 4
|
|
392
|
+
fi
|
|
338
393
|
|
|
339
|
-
|
|
394
|
+
echo "✅ External endpoint is InService: ${ENDPOINT_NAME}"
|
|
395
|
+
# Skip directly to IC creation — no endpoint config, no endpoint creation, no wait
|
|
396
|
+
SKIP_TO="create_ic"
|
|
397
|
+
else
|
|
398
|
+
TIMESTAMP=$(date +%s)
|
|
399
|
+
ENDPOINT_NAME="${PROJECT_NAME}-endpoint-${TIMESTAMP}"
|
|
340
400
|
|
|
341
|
-
|
|
342
|
-
if ! aws sagemaker create-endpoint-config \
|
|
343
|
-
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
344
|
-
--execution-role-arn "${ROLE_ARN}" \
|
|
345
|
-
--production-variants "${VARIANT_JSON}" \
|
|
346
|
-
--region "${AWS_REGION}"; then
|
|
401
|
+
_update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
|
|
347
402
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
echo " • The execution role ARN is valid"
|
|
351
|
-
echo " • The instance type is valid: ${INSTANCE_TYPE}"
|
|
352
|
-
echo " • The instance type is available in region: ${AWS_REGION}"
|
|
353
|
-
echo " • You have sufficient service quota for the instance type"
|
|
354
|
-
exit 4
|
|
355
|
-
fi
|
|
403
|
+
# Create endpoint configuration via shared helper
|
|
404
|
+
create_endpoint_config
|
|
356
405
|
|
|
357
|
-
|
|
406
|
+
_update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
|
|
358
407
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
408
|
+
# Record endpoint config in manifest (non-blocking)
|
|
409
|
+
ENDPOINT_CONFIG_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${ENDPOINT_CONFIG_NAME}"
|
|
410
|
+
./do/manifest add \
|
|
411
|
+
--type sagemaker-endpoint-config \
|
|
412
|
+
--id "${ENDPOINT_CONFIG_ARN}" \
|
|
413
|
+
--project "${PROJECT_NAME}" \
|
|
414
|
+
--meta "{\"endpointConfigName\":\"${ENDPOINT_CONFIG_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
415
|
+
2>/dev/null || true
|
|
367
416
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
417
|
+
# Step 2: Create endpoint
|
|
418
|
+
echo "🚀 Creating endpoint: ${ENDPOINT_NAME}"
|
|
419
|
+
if ! aws sagemaker create-endpoint \
|
|
420
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
421
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
422
|
+
--region "${AWS_REGION}"; then
|
|
374
423
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
424
|
+
echo "❌ Failed to create endpoint"
|
|
425
|
+
echo " Check that:"
|
|
426
|
+
echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
|
|
427
|
+
echo " • You have sufficient service quota in region: ${AWS_REGION}"
|
|
428
|
+
exit 4
|
|
429
|
+
fi
|
|
381
430
|
|
|
382
|
-
|
|
431
|
+
echo "✅ Endpoint creation initiated: ${ENDPOINT_NAME}"
|
|
383
432
|
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
433
|
+
# Record endpoint in manifest (non-blocking)
|
|
434
|
+
ENDPOINT_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${ENDPOINT_NAME}"
|
|
435
|
+
./do/manifest add \
|
|
436
|
+
--type sagemaker-endpoint \
|
|
437
|
+
--id "${ENDPOINT_ARN}" \
|
|
438
|
+
--project "${PROJECT_NAME}" \
|
|
439
|
+
--meta "{\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
440
|
+
2>/dev/null || true
|
|
441
|
+
fi
|
|
392
442
|
fi
|
|
393
443
|
|
|
394
444
|
# ============================================================
|
|
395
|
-
# Wait for endpoint (skip if already InService)
|
|
445
|
+
# Wait for endpoint (skip if already InService or external)
|
|
396
446
|
# ============================================================
|
|
397
447
|
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
398
448
|
echo "⏳ Waiting for endpoint to reach InService status..."
|
|
399
449
|
echo " This may take a few minutes..."
|
|
400
450
|
echo " If this times out, re-run ./do/deploy to resume."
|
|
401
451
|
|
|
402
|
-
|
|
403
|
-
--endpoint-name "${ENDPOINT_NAME}" \
|
|
404
|
-
--region "${AWS_REGION}"; then
|
|
405
|
-
|
|
406
|
-
# Check if it was a credential expiration vs actual failure
|
|
407
|
-
EP_CHECK=$(_get_endpoint_status "${ENDPOINT_NAME}" 2>/dev/null)
|
|
408
|
-
if [ "${EP_CHECK}" = "Creating" ]; then
|
|
409
|
-
echo ""
|
|
410
|
-
echo "⚠️ Wait interrupted (credentials may have expired), but endpoint is still creating."
|
|
411
|
-
echo " Refresh your credentials and re-run ./do/deploy to resume."
|
|
412
|
-
echo ""
|
|
413
|
-
echo " Or check status manually:"
|
|
414
|
-
echo " aws sagemaker describe-endpoint --endpoint-name ${ENDPOINT_NAME} --region ${AWS_REGION} --query EndpointStatus"
|
|
415
|
-
exit 4
|
|
416
|
-
fi
|
|
417
|
-
|
|
418
|
-
echo "❌ Endpoint failed to reach InService status"
|
|
419
|
-
echo " Check CloudWatch Logs for details:"
|
|
420
|
-
echo " https://console.aws.amazon.com/cloudwatch/home?region=${AWS_REGION}#logsV2:log-groups/log-group//aws/sagemaker/Endpoints/${ENDPOINT_NAME}"
|
|
421
|
-
exit 4
|
|
422
|
-
fi
|
|
452
|
+
wait_endpoint "${ENDPOINT_NAME}"
|
|
423
453
|
|
|
424
454
|
echo "✅ Endpoint is InService: ${ENDPOINT_NAME}"
|
|
425
455
|
fi
|
|
426
456
|
|
|
427
457
|
# ============================================================
|
|
428
|
-
# Step 3:
|
|
458
|
+
# Step 3: Deploy inference components (skip if resuming from wait_ic)
|
|
429
459
|
# ============================================================
|
|
430
460
|
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
431
|
-
# Generate new IC name if resuming after endpoint wait or failed IC
|
|
432
|
-
if [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
433
|
-
TIMESTAMP=$(date +%s)
|
|
434
|
-
IC_NAME="${PROJECT_NAME}-ic-${TIMESTAMP}"
|
|
435
|
-
_update_config_var "INFERENCE_COMPONENT_NAME" "${IC_NAME}"
|
|
436
|
-
fi
|
|
437
461
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
462
|
+
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
463
|
+
# _check_gpu_capacity
|
|
464
|
+
# Best-effort capacity guardrail: sums IC_GPU_COUNT across all do/ic/*.conf
|
|
465
|
+
# and compares against known GPU count for the instance type.
|
|
466
|
+
# Warns (does not error) if total exceeds instance capacity.
|
|
467
|
+
# Skips check if instance type is not in the known map.
|
|
468
|
+
_check_gpu_capacity() {
|
|
469
|
+
# Skip check if no INSTANCE_TYPE (external endpoints)
|
|
470
|
+
if [ -z "${INSTANCE_TYPE:-}" ]; then
|
|
471
|
+
return 0
|
|
472
|
+
fi
|
|
444
473
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
474
|
+
# Best-effort capacity guardrail: sums GPU requirements from base ICs only.
|
|
475
|
+
# NOTE: Only do/ic/*.conf files are counted. Adapter ICs (do/adapters/*.conf)
|
|
476
|
+
# share the base IC's GPU resources and have no ComputeResourceRequirements,
|
|
477
|
+
# so they are intentionally excluded from this capacity check.
|
|
478
|
+
#
|
|
479
|
+
# Hardcoded GPU counts for common SageMaker GPU instance types
|
|
480
|
+
local instance_gpus=""
|
|
481
|
+
case "${INSTANCE_TYPE}" in
|
|
482
|
+
ml.g4dn.xlarge) instance_gpus=1 ;;
|
|
483
|
+
ml.g4dn.12xlarge) instance_gpus=4 ;;
|
|
484
|
+
ml.g5.xlarge) instance_gpus=1 ;;
|
|
485
|
+
ml.g5.2xlarge) instance_gpus=1 ;;
|
|
486
|
+
ml.g5.4xlarge) instance_gpus=1 ;;
|
|
487
|
+
ml.g5.8xlarge) instance_gpus=1 ;;
|
|
488
|
+
ml.g5.12xlarge) instance_gpus=4 ;;
|
|
489
|
+
ml.g5.48xlarge) instance_gpus=8 ;;
|
|
490
|
+
ml.g6.xlarge) instance_gpus=1 ;;
|
|
491
|
+
ml.g6.12xlarge) instance_gpus=4 ;;
|
|
492
|
+
ml.g6.48xlarge) instance_gpus=8 ;;
|
|
493
|
+
ml.g6e.xlarge) instance_gpus=1 ;;
|
|
494
|
+
ml.g6e.2xlarge) instance_gpus=1 ;;
|
|
495
|
+
ml.g6e.4xlarge) instance_gpus=1 ;;
|
|
496
|
+
ml.g6e.8xlarge) instance_gpus=1 ;;
|
|
497
|
+
ml.g6e.12xlarge) instance_gpus=4 ;;
|
|
498
|
+
ml.g6e.48xlarge) instance_gpus=8 ;;
|
|
499
|
+
ml.g7e.xlarge) instance_gpus=1 ;;
|
|
500
|
+
ml.g7e.2xlarge) instance_gpus=1 ;;
|
|
501
|
+
ml.g7e.4xlarge) instance_gpus=1 ;;
|
|
502
|
+
ml.g7e.8xlarge) instance_gpus=1 ;;
|
|
503
|
+
ml.g7e.12xlarge) instance_gpus=4 ;;
|
|
504
|
+
ml.g7e.48xlarge) instance_gpus=8 ;;
|
|
505
|
+
ml.p3.2xlarge) instance_gpus=1 ;;
|
|
506
|
+
ml.p3.8xlarge) instance_gpus=4 ;;
|
|
507
|
+
ml.p3.16xlarge) instance_gpus=8 ;;
|
|
508
|
+
ml.p4d.24xlarge) instance_gpus=8 ;;
|
|
509
|
+
ml.p4de.24xlarge) instance_gpus=8 ;;
|
|
510
|
+
ml.p5.48xlarge) instance_gpus=8 ;;
|
|
511
|
+
*) instance_gpus="" ;;
|
|
512
|
+
esac
|
|
513
|
+
|
|
514
|
+
# Skip check if instance type not in map
|
|
515
|
+
if [ -z "${instance_gpus}" ]; then
|
|
516
|
+
return 0
|
|
517
|
+
fi
|
|
462
518
|
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
519
|
+
# Sum IC_GPU_COUNT across all IC config files
|
|
520
|
+
local total_gpu_requested=0
|
|
521
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
522
|
+
[ -f "${conf}" ] || continue
|
|
523
|
+
local ic_gpus
|
|
524
|
+
ic_gpus=$(grep "^export IC_GPU_COUNT=" "${conf}" 2>/dev/null | sed 's/^export IC_GPU_COUNT=//' | tr -d '"' || echo "1")
|
|
525
|
+
if [ -z "${ic_gpus}" ]; then
|
|
526
|
+
ic_gpus=1
|
|
527
|
+
fi
|
|
528
|
+
total_gpu_requested=$(( total_gpu_requested + ic_gpus ))
|
|
529
|
+
done
|
|
470
530
|
|
|
471
|
-
|
|
531
|
+
if [ "${total_gpu_requested}" -gt "${instance_gpus}" ]; then
|
|
532
|
+
echo ""
|
|
533
|
+
echo "⚠️ GPU capacity warning: ICs request ${total_gpu_requested} GPUs total, but ${INSTANCE_TYPE} has ${instance_gpus} GPUs."
|
|
534
|
+
echo " SageMaker will likely reject IC creation if capacity is exceeded."
|
|
535
|
+
echo " Consider reducing IC_GPU_COUNT values or using a larger instance type."
|
|
536
|
+
echo ""
|
|
537
|
+
fi
|
|
538
|
+
}
|
|
472
539
|
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
540
|
+
# Run capacity guardrail before deploying ICs
|
|
541
|
+
_check_gpu_capacity
|
|
542
|
+
|
|
543
|
+
# _delete_and_wait_ic <ic_name>
|
|
544
|
+
# Deletes an inference component and waits for deletion to complete.
|
|
545
|
+
# Polls until the IC is no longer found (avoids name conflicts on recreate).
|
|
546
|
+
_delete_and_wait_ic() {
|
|
547
|
+
local ic_name="$1"
|
|
548
|
+
local delete_timeout=600 # 10 minutes max wait for deletion
|
|
549
|
+
|
|
550
|
+
echo "🗑️ Deleting inference component: ${ic_name}"
|
|
551
|
+
if ! aws sagemaker delete-inference-component \
|
|
552
|
+
--inference-component-name "${ic_name}" \
|
|
553
|
+
--region "${AWS_REGION}" 2>/dev/null; then
|
|
554
|
+
echo " ⚠️ Delete call failed (IC may already be gone). Continuing..."
|
|
555
|
+
return 0
|
|
556
|
+
fi
|
|
482
557
|
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
echo "⏳ Waiting for inference component to reach InService status..."
|
|
487
|
-
echo " This may take 5-10 minutes..."
|
|
488
|
-
echo " If this times out, re-run ./do/deploy to resume."
|
|
558
|
+
echo " Waiting for deletion to complete..."
|
|
559
|
+
local delete_start
|
|
560
|
+
delete_start=$(date +%s)
|
|
489
561
|
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
IC_WAIT_START=$(date +%s)
|
|
562
|
+
while true; do
|
|
563
|
+
local ic_status
|
|
564
|
+
ic_status=$(_get_ic_status "${ic_name}")
|
|
494
565
|
|
|
495
|
-
|
|
496
|
-
|
|
566
|
+
if [ -z "${ic_status}" ]; then
|
|
567
|
+
echo " ✅ Inference component deleted: ${ic_name}"
|
|
568
|
+
break
|
|
569
|
+
fi
|
|
497
570
|
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
571
|
+
local elapsed=$(( $(date +%s) - delete_start ))
|
|
572
|
+
if [ "${elapsed}" -ge "${delete_timeout}" ]; then
|
|
573
|
+
echo " ⚠️ Deletion timed out after ${delete_timeout}s. IC status: ${ic_status}"
|
|
574
|
+
echo " Proceeding anyway — SageMaker may reject the new IC if name conflicts."
|
|
575
|
+
break
|
|
576
|
+
fi
|
|
577
|
+
|
|
578
|
+
echo " $(date +%H:%M:%S) Deleting... (${ic_status}, ${elapsed}s elapsed)"
|
|
579
|
+
sleep 15
|
|
580
|
+
done
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
# _deploy_single_ic <conf_file>
|
|
584
|
+
# Deploys a single IC with per-IC idempotency:
|
|
585
|
+
# - If FORCE_IC is true: delete existing IC, clear state, create fresh
|
|
586
|
+
# - If IC_DEPLOYED_NAME is set and InService → skip
|
|
587
|
+
# - If IC_DEPLOYED_NAME is set and Creating → wait for it
|
|
588
|
+
# - If IC_DEPLOYED_NAME is set and Failed → recreate with new timestamp
|
|
589
|
+
# - If IC_DEPLOYED_NAME is not set → create new IC
|
|
590
|
+
# Fail-fast: exits immediately on failure.
|
|
591
|
+
_deploy_single_ic() {
|
|
592
|
+
local ic_conf="$1"
|
|
593
|
+
local ic_basename
|
|
594
|
+
ic_basename=$(basename "${ic_conf}" .conf)
|
|
595
|
+
|
|
596
|
+
# Source the IC config to check IC_DEPLOYED_NAME
|
|
597
|
+
# Use a subshell-safe approach: read the variable without polluting scope
|
|
598
|
+
local existing_ic_name=""
|
|
599
|
+
if grep -q "^export IC_DEPLOYED_NAME=" "${ic_conf}" 2>/dev/null; then
|
|
600
|
+
existing_ic_name=$(grep "^export IC_DEPLOYED_NAME=" "${ic_conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
|
|
601
|
+
fi
|
|
602
|
+
|
|
603
|
+
# --force-ic: delete existing IC before recreating
|
|
604
|
+
if [ "${FORCE_IC}" = true ] && [ -n "${existing_ic_name}" ]; then
|
|
605
|
+
echo "🔄 --force-ic: recreating IC '${ic_basename}'"
|
|
606
|
+
_delete_and_wait_ic "${existing_ic_name}"
|
|
607
|
+
|
|
608
|
+
# Clear deployed state from config before recreating
|
|
609
|
+
_update_config_var "IC_DEPLOYED_NAME" "" "${ic_conf}"
|
|
610
|
+
_update_config_var "IC_DEPLOYED_AT" "" "${ic_conf}"
|
|
611
|
+
existing_ic_name=""
|
|
612
|
+
fi
|
|
613
|
+
|
|
614
|
+
if [ "${FORCE_IC}" = true ] && [ -z "${existing_ic_name}" ]; then
|
|
615
|
+
# Force mode with no existing IC — just create new
|
|
616
|
+
create_inference_component "${ic_conf}"
|
|
617
|
+
elif [ -n "${existing_ic_name}" ]; then
|
|
618
|
+
# IC was previously deployed — check its current status
|
|
619
|
+
local ic_status
|
|
620
|
+
ic_status=$(_get_ic_status "${existing_ic_name}")
|
|
621
|
+
|
|
622
|
+
case "${ic_status}" in
|
|
623
|
+
InService)
|
|
624
|
+
echo "✅ IC '${ic_basename}' already InService: ${existing_ic_name} — skipping"
|
|
625
|
+
IC_DEPLOYED_NAME="${existing_ic_name}"
|
|
626
|
+
return 0
|
|
627
|
+
;;
|
|
628
|
+
Creating)
|
|
629
|
+
echo "⏳ IC '${ic_basename}' is still Creating: ${existing_ic_name} — waiting..."
|
|
630
|
+
IC_DEPLOYED_NAME="${existing_ic_name}"
|
|
631
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
632
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
633
|
+
return 0
|
|
634
|
+
;;
|
|
635
|
+
Failed)
|
|
636
|
+
echo "⚠️ IC '${ic_basename}' previously Failed: ${existing_ic_name} — recreating..."
|
|
637
|
+
create_inference_component "${ic_conf}"
|
|
638
|
+
;;
|
|
639
|
+
*)
|
|
640
|
+
echo " IC '${ic_basename}' has unknown/missing status for ${existing_ic_name} — creating new..."
|
|
641
|
+
create_inference_component "${ic_conf}"
|
|
642
|
+
;;
|
|
643
|
+
esac
|
|
644
|
+
else
|
|
645
|
+
# No previous deployment — create new IC
|
|
646
|
+
create_inference_component "${ic_conf}"
|
|
647
|
+
fi
|
|
648
|
+
|
|
649
|
+
echo "⏳ Waiting for inference component to reach InService status..."
|
|
650
|
+
echo " This may take 5-10 minutes..."
|
|
651
|
+
|
|
652
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
653
|
+
|
|
654
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
655
|
+
|
|
656
|
+
# Record inference component in manifest (non-blocking)
|
|
657
|
+
local ic_arn="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_DEPLOYED_NAME}"
|
|
658
|
+
./do/manifest add \
|
|
659
|
+
--type sagemaker-inference-component \
|
|
660
|
+
--id "${ic_arn}" \
|
|
661
|
+
--project "${PROJECT_NAME}" \
|
|
662
|
+
--meta "{\"inferenceComponentName\":\"${IC_DEPLOYED_NAME}\",\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE:-external}\",\"region\":\"${AWS_REGION}\"}" \
|
|
663
|
+
2>/dev/null || true
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
if [ -n "${IC_TARGET}" ]; then
|
|
667
|
+
# Single IC path: deploy only the named IC
|
|
506
668
|
echo ""
|
|
507
|
-
echo "
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
echo " Re-run ./do/deploy to resume waiting."
|
|
669
|
+
echo "── Deploying IC: ${IC_TARGET} ──"
|
|
670
|
+
_deploy_single_ic "${SCRIPT_DIR}/ic/${IC_TARGET}.conf"
|
|
671
|
+
else
|
|
672
|
+
# Multi-IC path: iterate all IC config files (alphabetical order)
|
|
673
|
+
IC_SUMMARY=""
|
|
674
|
+
IC_DEPLOY_FAILED=false
|
|
675
|
+
|
|
676
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
677
|
+
[ -f "${conf}" ] || continue
|
|
678
|
+
local_ic_basename=$(basename "${conf}" .conf)
|
|
518
679
|
echo ""
|
|
519
|
-
echo "
|
|
520
|
-
|
|
680
|
+
echo "── Deploying IC: ${local_ic_basename} ──"
|
|
681
|
+
|
|
682
|
+
if ! _deploy_single_ic "${conf}"; then
|
|
683
|
+
echo "❌ IC '${local_ic_basename}' failed to deploy. Stopping."
|
|
684
|
+
IC_SUMMARY="${IC_SUMMARY} ${local_ic_basename}: FAILED\n"
|
|
685
|
+
IC_DEPLOY_FAILED=true
|
|
686
|
+
break
|
|
687
|
+
fi
|
|
688
|
+
|
|
689
|
+
IC_SUMMARY="${IC_SUMMARY} ${local_ic_basename}: ${IC_DEPLOYED_NAME} [InService]\n"
|
|
690
|
+
done
|
|
691
|
+
|
|
692
|
+
# Print summary
|
|
693
|
+
echo ""
|
|
694
|
+
echo "📋 IC Deployment Summary:"
|
|
695
|
+
echo -e "${IC_SUMMARY}"
|
|
696
|
+
|
|
697
|
+
if [ "${IC_DEPLOY_FAILED}" = true ]; then
|
|
698
|
+
echo "❌ Deployment stopped due to IC failure. Fix the issue and re-run ./do/deploy to resume."
|
|
521
699
|
exit 4
|
|
522
700
|
fi
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
701
|
+
fi
|
|
702
|
+
else
|
|
703
|
+
# Legacy single-IC path: no do/ic/ directory
|
|
704
|
+
create_inference_component_legacy
|
|
705
|
+
|
|
706
|
+
echo "⏳ Waiting for inference component to reach InService status..."
|
|
707
|
+
echo " This may take 5-10 minutes..."
|
|
708
|
+
echo " If this times out, re-run ./do/deploy to resume."
|
|
709
|
+
|
|
710
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
711
|
+
|
|
712
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
713
|
+
|
|
714
|
+
# Record inference component in manifest (non-blocking)
|
|
715
|
+
IC_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_DEPLOYED_NAME}"
|
|
716
|
+
./do/manifest add \
|
|
717
|
+
--type sagemaker-inference-component \
|
|
718
|
+
--id "${IC_ARN}" \
|
|
719
|
+
--project "${PROJECT_NAME}" \
|
|
720
|
+
--meta "{\"inferenceComponentName\":\"${IC_DEPLOYED_NAME}\",\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE:-external}\",\"region\":\"${AWS_REGION}\"}" \
|
|
721
|
+
2>/dev/null || true
|
|
722
|
+
fi
|
|
723
|
+
|
|
724
|
+
elif [ "${SKIP_TO}" = "wait_ic" ]; then
|
|
725
|
+
# Resuming: just wait for the IC that was already being created
|
|
726
|
+
echo "⏳ Waiting for inference component to reach InService status..."
|
|
727
|
+
echo " This may take 5-10 minutes..."
|
|
728
|
+
echo " If this times out, re-run ./do/deploy to resume."
|
|
729
|
+
|
|
730
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
731
|
+
|
|
732
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
733
|
+
fi
|
|
537
734
|
|
|
538
735
|
echo "✅ Deployment complete!"
|
|
539
736
|
echo ""
|
|
540
737
|
echo "📋 Deployment Details:"
|
|
541
738
|
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
542
|
-
|
|
543
|
-
echo "
|
|
544
|
-
echo " Region: ${AWS_REGION}"
|
|
545
|
-
|
|
739
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
740
|
+
echo " Endpoint Config: (external — not managed by this project)"
|
|
741
|
+
echo " Region: ${AWS_REGION}"
|
|
742
|
+
else
|
|
743
|
+
echo " Endpoint Config: ${ENDPOINT_CONFIG_NAME:-N/A}"
|
|
744
|
+
echo " Region: ${AWS_REGION}"
|
|
745
|
+
echo " Instance Type: ${INSTANCE_TYPE}"
|
|
746
|
+
fi
|
|
546
747
|
echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
547
748
|
echo ""
|
|
548
|
-
echo "
|
|
549
|
-
echo " ./do/test"
|
|
550
|
-
|
|
551
|
-
echo "
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
echo "
|
|
555
|
-
|
|
556
|
-
echo "
|
|
557
|
-
echo ""
|
|
558
|
-
echo "
|
|
559
|
-
|
|
749
|
+
echo "📋 What's next?"
|
|
750
|
+
echo " • Test your endpoint: ./do/test"
|
|
751
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
752
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
753
|
+
<% } %>
|
|
754
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
755
|
+
echo " • Add a LoRA adapter: ./do/adapter add <name> --weights s3://..."
|
|
756
|
+
<% } %>
|
|
757
|
+
echo " • View endpoint status: ./do/status"
|
|
758
|
+
echo " • Register this deployment: ./do/register"
|
|
759
|
+
echo " • View logs: ./do/logs"
|
|
760
|
+
<% if (!(typeof existingEndpointName !== 'undefined' && existingEndpointName)) { %>
|
|
761
|
+
echo " • Clean up when done: ./do/clean endpoint"
|
|
762
|
+
<% } %>
|
|
560
763
|
|
|
561
764
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
562
765
|
# ============================================================
|
|
@@ -565,6 +768,13 @@ echo " ./do/clean endpoint"
|
|
|
565
768
|
# Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
|
|
566
769
|
# ============================================================
|
|
567
770
|
|
|
771
|
+
# Source shared helpers
|
|
772
|
+
source "${SCRIPT_DIR}/lib/secrets.sh"
|
|
773
|
+
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
774
|
+
|
|
775
|
+
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
776
|
+
resolve_secrets
|
|
777
|
+
|
|
568
778
|
# Validate execution role ARN
|
|
569
779
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
570
780
|
echo "❌ Execution role ARN not provided"
|
|
@@ -727,27 +937,6 @@ ASYNC_SNS_ERROR_TOPIC_NAME=$(echo "${ASYNC_SNS_ERROR_TOPIC}" | awk -F: '{print $
|
|
|
727
937
|
# Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
|
|
728
938
|
# ============================================================
|
|
729
939
|
|
|
730
|
-
# Helper: persist a variable to do/config so other scripts can use it
|
|
731
|
-
_update_config_var() {
|
|
732
|
-
local var_name="$1" var_value="$2" config_file="${SCRIPT_DIR}/config"
|
|
733
|
-
if grep -q "^export ${var_name}=" "${config_file}" 2>/dev/null; then
|
|
734
|
-
sed -i.bak "s|^export ${var_name}=.*|export ${var_name}=\"${var_value}\"|" "${config_file}"
|
|
735
|
-
rm -f "${config_file}.bak"
|
|
736
|
-
else
|
|
737
|
-
echo "" >> "${config_file}"
|
|
738
|
-
echo "export ${var_name}=\"${var_value}\"" >> "${config_file}"
|
|
739
|
-
fi
|
|
740
|
-
}
|
|
741
|
-
|
|
742
|
-
# Helper: query a SageMaker resource status, returns empty string if not found
|
|
743
|
-
_get_endpoint_status() {
|
|
744
|
-
aws sagemaker describe-endpoint \
|
|
745
|
-
--endpoint-name "$1" \
|
|
746
|
-
--region "${AWS_REGION}" \
|
|
747
|
-
--query EndpointStatus \
|
|
748
|
-
--output text 2>/dev/null || echo ""
|
|
749
|
-
}
|
|
750
|
-
|
|
751
940
|
# ============================================================
|
|
752
941
|
# Idempotency: check for existing deployment from a previous run
|
|
753
942
|
# ============================================================
|
|
@@ -923,27 +1112,7 @@ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
|
923
1112
|
echo " This may take several minutes..."
|
|
924
1113
|
echo " If this times out, re-run ./do/deploy to resume."
|
|
925
1114
|
|
|
926
|
-
|
|
927
|
-
--endpoint-name "${ENDPOINT_NAME}" \
|
|
928
|
-
--region "${AWS_REGION}"; then
|
|
929
|
-
|
|
930
|
-
# Check if it was a credential expiration vs actual failure
|
|
931
|
-
EP_CHECK=$(_get_endpoint_status "${ENDPOINT_NAME}" 2>/dev/null)
|
|
932
|
-
if [ "${EP_CHECK}" = "Creating" ]; then
|
|
933
|
-
echo ""
|
|
934
|
-
echo "⚠️ Wait interrupted (credentials may have expired), but endpoint is still creating."
|
|
935
|
-
echo " Refresh your credentials and re-run ./do/deploy to resume."
|
|
936
|
-
echo ""
|
|
937
|
-
echo " Or check status manually:"
|
|
938
|
-
echo " aws sagemaker describe-endpoint --endpoint-name ${ENDPOINT_NAME} --region ${AWS_REGION} --query EndpointStatus"
|
|
939
|
-
exit 4
|
|
940
|
-
fi
|
|
941
|
-
|
|
942
|
-
echo "❌ Async endpoint failed to reach InService status"
|
|
943
|
-
echo " Check CloudWatch Logs for details:"
|
|
944
|
-
echo " https://console.aws.amazon.com/cloudwatch/home?region=${AWS_REGION}#logsV2:log-groups/log-group//aws/sagemaker/Endpoints/${ENDPOINT_NAME}"
|
|
945
|
-
exit 4
|
|
946
|
-
fi
|
|
1115
|
+
wait_endpoint "${ENDPOINT_NAME}"
|
|
947
1116
|
fi
|
|
948
1117
|
|
|
949
1118
|
echo "✅ Async deployment complete!"
|
|
@@ -959,17 +1128,15 @@ echo " S3 Output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
|
959
1128
|
echo " SNS Success: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
960
1129
|
echo " SNS Error: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
961
1130
|
echo ""
|
|
962
|
-
echo "
|
|
963
|
-
echo " ./do/test"
|
|
964
|
-
echo ""
|
|
965
|
-
|
|
966
|
-
echo " ./do/
|
|
967
|
-
|
|
968
|
-
echo "
|
|
969
|
-
echo "
|
|
970
|
-
echo ""
|
|
971
|
-
echo "🧹 Clean up when done:"
|
|
972
|
-
echo " ./do/clean endpoint"
|
|
1131
|
+
echo "📋 What's next?"
|
|
1132
|
+
echo " • Test your async endpoint: ./do/test"
|
|
1133
|
+
echo " • Check async output: aws s3 ls ${ASYNC_S3_OUTPUT_PATH}"
|
|
1134
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
1135
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
1136
|
+
<% } %>
|
|
1137
|
+
echo " • Register this deployment: ./do/register"
|
|
1138
|
+
echo " • View logs: ./do/logs"
|
|
1139
|
+
echo " • Clean up when done: ./do/clean endpoint"
|
|
973
1140
|
|
|
974
1141
|
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
975
1142
|
# ============================================================
|
|
@@ -1165,22 +1332,16 @@ echo " Deployment: ${PROJECT_NAME}"
|
|
|
1165
1332
|
echo " Replicas: ${HYPERPOD_REPLICAS}"
|
|
1166
1333
|
echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
1167
1334
|
echo ""
|
|
1168
|
-
echo "
|
|
1169
|
-
echo "
|
|
1170
|
-
echo " kubectl get pods -n ${HYPERPOD_NAMESPACE}"
|
|
1171
|
-
echo " kubectl
|
|
1172
|
-
|
|
1173
|
-
echo "
|
|
1174
|
-
|
|
1175
|
-
echo ""
|
|
1176
|
-
echo "
|
|
1177
|
-
echo " ./do/
|
|
1178
|
-
echo ""
|
|
1179
|
-
echo "📋 View logs:"
|
|
1180
|
-
echo " ./do/logs"
|
|
1181
|
-
echo ""
|
|
1182
|
-
echo "🧹 Clean up when done:"
|
|
1183
|
-
echo " ./do/clean hyperpod"
|
|
1335
|
+
echo "📋 What's next?"
|
|
1336
|
+
echo " • Test your deployment: ./do/test"
|
|
1337
|
+
echo " • Check pod status: kubectl get pods -n ${HYPERPOD_NAMESPACE}"
|
|
1338
|
+
echo " • View pod logs: kubectl logs -n ${HYPERPOD_NAMESPACE} -l app=${PROJECT_NAME}"
|
|
1339
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
1340
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
1341
|
+
<% } %>
|
|
1342
|
+
echo " • Register this deployment: ./do/register"
|
|
1343
|
+
echo " • View logs: ./do/logs"
|
|
1344
|
+
echo " • Clean up when done: ./do/clean hyperpod"
|
|
1184
1345
|
|
|
1185
1346
|
# Write kubeconfig path to config so other scripts can use it (idempotent)
|
|
1186
1347
|
_update_config_var() {
|
|
@@ -1202,6 +1363,13 @@ _update_config_var "KUBECONFIG" "${KUBECONFIG_PATH}"
|
|
|
1202
1363
|
# Flow: create-model → create-transform-job → poll until completion
|
|
1203
1364
|
# ============================================================
|
|
1204
1365
|
|
|
1366
|
+
# Source shared helpers
|
|
1367
|
+
source "${SCRIPT_DIR}/lib/secrets.sh"
|
|
1368
|
+
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
1369
|
+
|
|
1370
|
+
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
1371
|
+
resolve_secrets
|
|
1372
|
+
|
|
1205
1373
|
# Validate execution role ARN
|
|
1206
1374
|
if [ -z "${ROLE_ARN:-}" ]; then
|
|
1207
1375
|
echo "❌ Execution role ARN not provided"
|
|
@@ -1354,18 +1522,6 @@ fi
|
|
|
1354
1522
|
echo "✅ Using custom S3 output path: ${BATCH_OUTPUT_PATH}"
|
|
1355
1523
|
<% } %>
|
|
1356
1524
|
|
|
1357
|
-
# Helper: persist a variable to do/config so other scripts can use it
|
|
1358
|
-
_update_config_var() {
|
|
1359
|
-
local var_name="$1" var_value="$2" config_file="${SCRIPT_DIR}/config"
|
|
1360
|
-
if grep -q "^export ${var_name}=" "${config_file}" 2>/dev/null; then
|
|
1361
|
-
sed -i.bak "s|^export ${var_name}=.*|export ${var_name}=\"${var_value}\"|" "${config_file}"
|
|
1362
|
-
rm -f "${config_file}.bak"
|
|
1363
|
-
else
|
|
1364
|
-
echo "" >> "${config_file}"
|
|
1365
|
-
echo "export ${var_name}=\"${var_value}\"" >> "${config_file}"
|
|
1366
|
-
fi
|
|
1367
|
-
}
|
|
1368
|
-
|
|
1369
1525
|
# ============================================================
|
|
1370
1526
|
# Check for previous transform job still running
|
|
1371
1527
|
# ============================================================
|
|
@@ -1600,16 +1756,11 @@ else
|
|
|
1600
1756
|
fi
|
|
1601
1757
|
|
|
1602
1758
|
echo ""
|
|
1603
|
-
echo "
|
|
1604
|
-
echo "
|
|
1605
|
-
echo ""
|
|
1606
|
-
echo "
|
|
1607
|
-
echo " ./do/
|
|
1608
|
-
echo ""
|
|
1609
|
-
echo "📋 View logs:"
|
|
1610
|
-
echo " ./do/logs"
|
|
1611
|
-
echo ""
|
|
1612
|
-
echo "🧹 Clean up when done:"
|
|
1613
|
-
echo " ./do/clean"
|
|
1759
|
+
echo "📋 What's next?"
|
|
1760
|
+
echo " • View results: cat batch-output/"
|
|
1761
|
+
echo " • Review results: ./do/test"
|
|
1762
|
+
echo " • Register this deployment: ./do/register"
|
|
1763
|
+
echo " • View logs: ./do/logs"
|
|
1764
|
+
echo " • Clean up when done: ./do/clean"
|
|
1614
1765
|
|
|
1615
1766
|
<% } %>
|