@aws/ml-container-creator 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +31 -137
- package/config/parameter-schema-v2.json +2065 -0
- package/package.json +6 -3
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/models.json +182 -26
- package/src/app.js +6 -389
- package/src/lib/bootstrap-command-handler.js +75 -1078
- package/src/lib/bootstrap-profile-manager.js +634 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/config-loader.js +405 -0
- package/src/lib/config-manager.js +59 -1668
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +140 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +471 -0
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +202 -0
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +70 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +398 -0
- package/templates/code/serve +5 -134
- package/templates/code/serve.d/lmi.ejs +19 -0
- package/templates/code/serve.d/sglang.ejs +47 -0
- package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
- package/templates/code/serve.d/vllm.ejs +48 -0
- package/templates/do/clean +1 -1387
- package/templates/do/clean.d/async-inference.ejs +508 -0
- package/templates/do/clean.d/batch-transform.ejs +512 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
- package/templates/do/clean.d/managed-inference.ejs +1043 -0
- package/templates/do/deploy +1 -1766
- package/templates/do/deploy.d/async-inference.ejs +501 -0
- package/templates/do/deploy.d/batch-transform.ejs +529 -0
- package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
- package/templates/do/deploy.d/managed-inference.ejs +726 -0
- package/config/parameter-schema.json +0 -88
|
@@ -0,0 +1,726 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
set -u
|
|
7
|
+
set -o pipefail
|
|
8
|
+
|
|
9
|
+
# Parse flags
|
|
10
|
+
FORCE_NEW=false
|
|
11
|
+
FORCE_IC=false
|
|
12
|
+
IC_TARGET=""
|
|
13
|
+
while [ $# -gt 0 ]; do
|
|
14
|
+
case "$1" in
|
|
15
|
+
--force) FORCE_NEW=true; shift ;;
|
|
16
|
+
--force-ic)
|
|
17
|
+
FORCE_IC=true
|
|
18
|
+
shift
|
|
19
|
+
# Optional name argument: --force-ic <name>
|
|
20
|
+
if [ $# -gt 0 ] && [[ ! "$1" == --* ]]; then
|
|
21
|
+
IC_TARGET="$1"
|
|
22
|
+
shift
|
|
23
|
+
fi
|
|
24
|
+
;;
|
|
25
|
+
--ic)
|
|
26
|
+
if [ -z "${2:-}" ]; then
|
|
27
|
+
echo "❌ --ic requires a name argument"
|
|
28
|
+
echo " Usage: ./do/deploy --ic <name>"
|
|
29
|
+
exit 1
|
|
30
|
+
fi
|
|
31
|
+
IC_TARGET="$2"
|
|
32
|
+
shift 2
|
|
33
|
+
;;
|
|
34
|
+
--help|-h)
|
|
35
|
+
echo "Usage: ./do/deploy [--force] [--force-ic [<name>]] [--ic <name>]"
|
|
36
|
+
echo ""
|
|
37
|
+
echo "Options:"
|
|
38
|
+
echo " --force Create a new endpoint and IC, even if one already exists."
|
|
39
|
+
echo " --force-ic Recreate ALL inference components on the existing endpoint."
|
|
40
|
+
echo " --force-ic <name> Recreate only the named IC on the existing endpoint."
|
|
41
|
+
echo " --ic <name> Deploy only the named IC (from do/ic/<name>.conf)."
|
|
42
|
+
echo ""
|
|
43
|
+
echo "Without flags, deploy resumes from the last run."
|
|
44
|
+
exit 0
|
|
45
|
+
;;
|
|
46
|
+
*)
|
|
47
|
+
echo "❌ Unknown option: $1"
|
|
48
|
+
echo " Run ./do/deploy --help for usage."
|
|
49
|
+
exit 1
|
|
50
|
+
;;
|
|
51
|
+
esac
|
|
52
|
+
done
|
|
53
|
+
|
|
54
|
+
# Source configuration
|
|
55
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
56
|
+
source "${SCRIPT_DIR}/config"
|
|
57
|
+
|
|
58
|
+
echo "🚀 Deploying to AWS"
|
|
59
|
+
echo " Project: ${PROJECT_NAME}"
|
|
60
|
+
echo " Deployment config: ${DEPLOYMENT_CONFIG}"
|
|
61
|
+
echo " Region: ${AWS_REGION}"
|
|
62
|
+
echo " Build target: ${BUILD_TARGET}"
|
|
63
|
+
echo " Deployment target: ${DEPLOYMENT_TARGET}"
|
|
64
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
65
|
+
echo " Endpoint: ${ENDPOINT_NAME} (external)"
|
|
66
|
+
else
|
|
67
|
+
echo " Instance type: ${INSTANCE_TYPE}"
|
|
68
|
+
fi
|
|
69
|
+
|
|
70
|
+
# Check AWS credentials
|
|
71
|
+
echo "🔍 Validating AWS credentials..."
|
|
72
|
+
if ! aws sts get-caller-identity &> /dev/null; then
|
|
73
|
+
echo "❌ AWS credentials not configured"
|
|
74
|
+
echo " Run: aws configure"
|
|
75
|
+
echo " Or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"
|
|
76
|
+
exit 4
|
|
77
|
+
fi
|
|
78
|
+
|
|
79
|
+
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
80
|
+
echo "✅ AWS credentials validated (Account: ${AWS_ACCOUNT_ID})"
|
|
81
|
+
|
|
82
|
+
# Construct ECR repository URL
|
|
83
|
+
ECR_REPOSITORY="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPOSITORY_NAME}"
|
|
84
|
+
|
|
85
|
+
# ============================================================
|
|
86
|
+
# Shared: Verify ECR image exists
|
|
87
|
+
# ============================================================
|
|
88
|
+
echo "🔍 Verifying ECR image exists..."
|
|
89
|
+
if ! aws ecr describe-images \
|
|
90
|
+
--repository-name "${ECR_REPOSITORY_NAME}" \
|
|
91
|
+
--image-ids imageTag="${PROJECT_NAME}-latest" \
|
|
92
|
+
--region "${AWS_REGION}" &> /dev/null; then
|
|
93
|
+
|
|
94
|
+
echo "❌ ECR image not found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
|
|
95
|
+
echo ""
|
|
96
|
+
echo "Please build and push your image first:"
|
|
97
|
+
echo " ./do/submit"
|
|
98
|
+
echo ""
|
|
99
|
+
echo "After the build completes successfully, run this deploy script again."
|
|
100
|
+
exit 4
|
|
101
|
+
fi
|
|
102
|
+
|
|
103
|
+
echo "✅ ECR image found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
|
|
104
|
+
IMAGE_TAG="${PROJECT_NAME}-latest"
|
|
105
|
+
|
|
106
|
+
# ============================================================
|
|
107
|
+
# Shared: Resolve secrets for container environment
|
|
108
|
+
# ============================================================
|
|
109
|
+
CONTAINER_ENV_JSON=""
|
|
110
|
+
|
|
111
|
+
if [ -n "${HF_TOKEN_ARN:-}" ]; then
|
|
112
|
+
echo "🔐 Resolving HuggingFace token from Secrets Manager..."
|
|
113
|
+
RESOLVED_HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
114
|
+
echo "❌ Failed to resolve HuggingFace token from Secrets Manager"
|
|
115
|
+
exit 3
|
|
116
|
+
}
|
|
117
|
+
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${RESOLVED_HF_TOKEN}\""
|
|
118
|
+
elif [ -n "${HF_TOKEN:-}" ]; then
|
|
119
|
+
CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${HF_TOKEN}\""
|
|
120
|
+
fi
|
|
121
|
+
|
|
122
|
+
if [ -n "${NGC_API_KEY_ARN:-}" ]; then
|
|
123
|
+
echo "🔐 Resolving NGC API key from Secrets Manager..."
|
|
124
|
+
RESOLVED_NGC_KEY=$(aws secretsmanager get-secret-value --secret-id "${NGC_API_KEY_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
|
|
125
|
+
echo "❌ Failed to resolve NGC API key from Secrets Manager"
|
|
126
|
+
exit 3
|
|
127
|
+
}
|
|
128
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
129
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
130
|
+
else
|
|
131
|
+
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
|
|
132
|
+
fi
|
|
133
|
+
elif [ -n "${NGC_API_KEY:-}" ]; then
|
|
134
|
+
if [ -n "${CONTAINER_ENV_JSON}" ]; then
|
|
135
|
+
CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
136
|
+
else
|
|
137
|
+
CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${NGC_API_KEY}\""
|
|
138
|
+
fi
|
|
139
|
+
fi
|
|
140
|
+
|
|
141
|
+
# ============================================================
|
|
142
|
+
# SageMaker Real-Time Inference Deployment (Inference Components)
|
|
143
|
+
# ============================================================
|
|
144
|
+
|
|
145
|
+
# Source shared helpers
|
|
146
|
+
source "${SCRIPT_DIR}/lib/secrets.sh"
|
|
147
|
+
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
148
|
+
source "${SCRIPT_DIR}/lib/endpoint-config.sh"
|
|
149
|
+
source "${SCRIPT_DIR}/lib/inference-component.sh"
|
|
150
|
+
|
|
151
|
+
# Validate execution role ARN
|
|
152
|
+
if [ -z "${ROLE_ARN:-}" ]; then
|
|
153
|
+
echo "❌ Execution role ARN not provided"
|
|
154
|
+
echo ""
|
|
155
|
+
echo "Usage:"
|
|
156
|
+
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
157
|
+
echo " ./do/deploy"
|
|
158
|
+
echo ""
|
|
159
|
+
echo "Or set ROLE_ARN in do/config"
|
|
160
|
+
echo ""
|
|
161
|
+
echo "The execution role must have permissions for:"
|
|
162
|
+
echo " • SageMaker endpoint and inference component management"
|
|
163
|
+
echo " • ECR image access"
|
|
164
|
+
echo " • S3 access (if using model artifacts)"
|
|
165
|
+
echo " • CloudWatch Logs"
|
|
166
|
+
exit 3
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
echo " Using execution role: ${ROLE_ARN}"
|
|
170
|
+
|
|
171
|
+
# Validate --ic argument if specified (set by --ic <name> or --force-ic <name>)
|
|
172
|
+
if [ -n "${IC_TARGET}" ]; then
|
|
173
|
+
if [ ! -d "${SCRIPT_DIR}/ic" ]; then
|
|
174
|
+
echo "❌ IC name specified but no do/ic/ directory found"
|
|
175
|
+
echo " This project does not use multi-IC configuration."
|
|
176
|
+
echo " Remove --ic/--force-ic <name> to deploy using the legacy single-IC path."
|
|
177
|
+
exit 1
|
|
178
|
+
fi
|
|
179
|
+
if [ ! -f "${SCRIPT_DIR}/ic/${IC_TARGET}.conf" ]; then
|
|
180
|
+
echo "❌ IC config not found: do/ic/${IC_TARGET}.conf"
|
|
181
|
+
echo ""
|
|
182
|
+
echo " Available ICs:"
|
|
183
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
184
|
+
[ -f "${conf}" ] || continue
|
|
185
|
+
echo " • $(basename "${conf}" .conf)"
|
|
186
|
+
done
|
|
187
|
+
echo ""
|
|
188
|
+
echo " Usage: ./do/deploy --ic <name>"
|
|
189
|
+
exit 1
|
|
190
|
+
fi
|
|
191
|
+
fi
|
|
192
|
+
|
|
193
|
+
# Resolve container secrets (HF_TOKEN, NGC_API_KEY)
|
|
194
|
+
resolve_secrets
|
|
195
|
+
|
|
196
|
+
# ============================================================
|
|
197
|
+
# Idempotency: check for existing deployment from a previous run
|
|
198
|
+
# ============================================================
|
|
199
|
+
SKIP_TO=""
|
|
200
|
+
|
|
201
|
+
if [ "${FORCE_NEW}" = true ]; then
|
|
202
|
+
echo "🔄 --force: ignoring previous deployment, creating new resources."
|
|
203
|
+
elif [ "${FORCE_IC}" = true ] && [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
204
|
+
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
205
|
+
if [ "${EP_STATUS}" = "InService" ]; then
|
|
206
|
+
if [ -n "${IC_TARGET}" ]; then
|
|
207
|
+
echo "🔄 --force-ic: recreating IC '${IC_TARGET}' on existing endpoint: ${ENDPOINT_NAME}"
|
|
208
|
+
else
|
|
209
|
+
echo "🔄 --force-ic: recreating ALL inference components on existing endpoint: ${ENDPOINT_NAME}"
|
|
210
|
+
fi
|
|
211
|
+
SKIP_TO="create_ic"
|
|
212
|
+
else
|
|
213
|
+
echo "⚠️ --force-ic requires an InService endpoint, but ${ENDPOINT_NAME} is: ${EP_STATUS:-not found}"
|
|
214
|
+
echo " Use --force to create a new endpoint, or wait for the current one."
|
|
215
|
+
exit 4
|
|
216
|
+
fi
|
|
217
|
+
elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
218
|
+
echo "🔍 Checking for existing deployment: ${ENDPOINT_NAME}"
|
|
219
|
+
|
|
220
|
+
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
221
|
+
|
|
222
|
+
case "${EP_STATUS}" in
|
|
223
|
+
InService)
|
|
224
|
+
echo "✅ Endpoint already InService: ${ENDPOINT_NAME}"
|
|
225
|
+
|
|
226
|
+
# Check inference component
|
|
227
|
+
if [ -n "${INFERENCE_COMPONENT_NAME:-}" ]; then
|
|
228
|
+
IC_STATUS=$(_get_ic_status "${INFERENCE_COMPONENT_NAME}")
|
|
229
|
+
|
|
230
|
+
case "${IC_STATUS}" in
|
|
231
|
+
InService)
|
|
232
|
+
echo "✅ Inference component already InService: ${INFERENCE_COMPONENT_NAME}"
|
|
233
|
+
echo ""
|
|
234
|
+
echo "📋 Deployment is already live. Nothing to do."
|
|
235
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
236
|
+
echo " Inference Component: ${INFERENCE_COMPONENT_NAME}"
|
|
237
|
+
echo ""
|
|
238
|
+
echo "🧪 Test your endpoint:"
|
|
239
|
+
echo " ./do/test"
|
|
240
|
+
echo ""
|
|
241
|
+
echo "🧹 Clean up when done:"
|
|
242
|
+
echo " ./do/clean endpoint"
|
|
243
|
+
exit 0
|
|
244
|
+
;;
|
|
245
|
+
Creating)
|
|
246
|
+
echo "⏳ Inference component still creating: ${INFERENCE_COMPONENT_NAME}"
|
|
247
|
+
SKIP_TO="wait_ic"
|
|
248
|
+
IC_DEPLOYED_NAME="${INFERENCE_COMPONENT_NAME}"
|
|
249
|
+
;;
|
|
250
|
+
Failed)
|
|
251
|
+
echo "⚠️ Inference component failed: ${INFERENCE_COMPONENT_NAME}"
|
|
252
|
+
echo " Will create a new inference component on the existing endpoint."
|
|
253
|
+
SKIP_TO="create_ic"
|
|
254
|
+
;;
|
|
255
|
+
*)
|
|
256
|
+
# Stored IC not found — check if a different IC is running on this endpoint
|
|
257
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
258
|
+
# External endpoint: never adopt ICs we didn't create
|
|
259
|
+
echo " Stored IC not found on external endpoint. Will create a new one."
|
|
260
|
+
SKIP_TO="create_ic"
|
|
261
|
+
else
|
|
262
|
+
LIVE_IC=$(_find_active_ic_on_endpoint "${ENDPOINT_NAME}")
|
|
263
|
+
if [ -n "${LIVE_IC}" ] && [ "${LIVE_IC}" != "None" ]; then
|
|
264
|
+
echo "✅ Found running inference component on endpoint: ${LIVE_IC}"
|
|
265
|
+
echo " (config had stale reference: ${INFERENCE_COMPONENT_NAME})"
|
|
266
|
+
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
267
|
+
echo ""
|
|
268
|
+
echo "📋 Deployment is already live. Nothing to do."
|
|
269
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
270
|
+
echo " Inference Component: ${LIVE_IC}"
|
|
271
|
+
echo ""
|
|
272
|
+
echo "🧪 Test your endpoint:"
|
|
273
|
+
echo " ./do/test"
|
|
274
|
+
echo ""
|
|
275
|
+
echo "🧹 Clean up when done:"
|
|
276
|
+
echo " ./do/clean endpoint"
|
|
277
|
+
exit 0
|
|
278
|
+
else
|
|
279
|
+
echo " No existing inference component found on endpoint. Will create one."
|
|
280
|
+
SKIP_TO="create_ic"
|
|
281
|
+
fi
|
|
282
|
+
fi
|
|
283
|
+
;;
|
|
284
|
+
esac
|
|
285
|
+
else
|
|
286
|
+
# No IC name in config — check if one is already running on the endpoint
|
|
287
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
288
|
+
# External endpoint: never adopt ICs we didn't create
|
|
289
|
+
echo " No previous IC deployed by this project. Will create a new one."
|
|
290
|
+
SKIP_TO="create_ic"
|
|
291
|
+
else
|
|
292
|
+
LIVE_IC=$(_find_active_ic_on_endpoint "${ENDPOINT_NAME}")
|
|
293
|
+
if [ -n "${LIVE_IC}" ] && [ "${LIVE_IC}" != "None" ]; then
|
|
294
|
+
echo "✅ Found running inference component on endpoint: ${LIVE_IC}"
|
|
295
|
+
_update_config_var "INFERENCE_COMPONENT_NAME" "${LIVE_IC}"
|
|
296
|
+
echo ""
|
|
297
|
+
echo "📋 Deployment is already live. Nothing to do."
|
|
298
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
299
|
+
echo " Inference Component: ${LIVE_IC}"
|
|
300
|
+
echo ""
|
|
301
|
+
echo "🧪 Test your endpoint:"
|
|
302
|
+
echo " ./do/test"
|
|
303
|
+
echo ""
|
|
304
|
+
echo "🧹 Clean up when done:"
|
|
305
|
+
echo " ./do/clean endpoint"
|
|
306
|
+
exit 0
|
|
307
|
+
else
|
|
308
|
+
SKIP_TO="create_ic"
|
|
309
|
+
fi
|
|
310
|
+
fi
|
|
311
|
+
fi
|
|
312
|
+
;;
|
|
313
|
+
Creating|Updating)
|
|
314
|
+
echo "⏳ Endpoint still ${EP_STATUS}: ${ENDPOINT_NAME}"
|
|
315
|
+
SKIP_TO="wait_endpoint"
|
|
316
|
+
;;
|
|
317
|
+
Failed)
|
|
318
|
+
echo "⚠️ Previous endpoint failed: ${ENDPOINT_NAME}"
|
|
319
|
+
echo " Creating a new deployment. Clean up the failed endpoint with:"
|
|
320
|
+
echo " ./do/clean endpoint"
|
|
321
|
+
echo ""
|
|
322
|
+
# Fall through to create new resources
|
|
323
|
+
;;
|
|
324
|
+
"")
|
|
325
|
+
echo " Previous endpoint not found (may have been cleaned up). Creating new deployment."
|
|
326
|
+
;;
|
|
327
|
+
*)
|
|
328
|
+
echo " Endpoint in unexpected state: ${EP_STATUS}. Creating new deployment."
|
|
329
|
+
;;
|
|
330
|
+
esac
|
|
331
|
+
fi
|
|
332
|
+
|
|
333
|
+
# ============================================================
|
|
334
|
+
# Step 1: Create endpoint configuration and endpoint (skip if resuming)
|
|
335
|
+
# ============================================================
|
|
336
|
+
if [ -z "${SKIP_TO}" ]; then
|
|
337
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
338
|
+
# External endpoint: validate it still exists and is InService
|
|
339
|
+
echo "🔗 Using external endpoint: ${ENDPOINT_NAME}"
|
|
340
|
+
echo " Validating endpoint status..."
|
|
341
|
+
|
|
342
|
+
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
343
|
+
|
|
344
|
+
if [ -z "${EP_STATUS}" ]; then
|
|
345
|
+
echo "❌ External endpoint not found: ${ENDPOINT_NAME}"
|
|
346
|
+
echo " The endpoint may have been deleted. Update ENDPOINT_NAME in do/config"
|
|
347
|
+
echo " or remove ENDPOINT_EXTERNAL=true to create a new endpoint."
|
|
348
|
+
exit 4
|
|
349
|
+
fi
|
|
350
|
+
|
|
351
|
+
if [ "${EP_STATUS}" != "InService" ]; then
|
|
352
|
+
echo "❌ External endpoint not InService: ${ENDPOINT_NAME} (status: ${EP_STATUS})"
|
|
353
|
+
echo " The endpoint must be InService before attaching inference components."
|
|
354
|
+
echo " Wait for the endpoint to become InService, or update do/config."
|
|
355
|
+
exit 4
|
|
356
|
+
fi
|
|
357
|
+
|
|
358
|
+
echo "✅ External endpoint is InService: ${ENDPOINT_NAME}"
|
|
359
|
+
# Skip directly to IC creation — no endpoint config, no endpoint creation, no wait
|
|
360
|
+
SKIP_TO="create_ic"
|
|
361
|
+
else
|
|
362
|
+
TIMESTAMP=$(date +%s)
|
|
363
|
+
ENDPOINT_NAME="${PROJECT_NAME}-endpoint-${TIMESTAMP}"
|
|
364
|
+
|
|
365
|
+
_update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
|
|
366
|
+
|
|
367
|
+
# Create endpoint configuration via shared helper
|
|
368
|
+
create_endpoint_config
|
|
369
|
+
|
|
370
|
+
_update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
|
|
371
|
+
|
|
372
|
+
# Record endpoint config in manifest (non-blocking)
|
|
373
|
+
ENDPOINT_CONFIG_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${ENDPOINT_CONFIG_NAME}"
|
|
374
|
+
./do/manifest add \
|
|
375
|
+
--type sagemaker-endpoint-config \
|
|
376
|
+
--id "${ENDPOINT_CONFIG_ARN}" \
|
|
377
|
+
--project "${PROJECT_NAME}" \
|
|
378
|
+
--meta "{\"endpointConfigName\":\"${ENDPOINT_CONFIG_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
379
|
+
2>/dev/null || true
|
|
380
|
+
|
|
381
|
+
# Step 2: Create endpoint
|
|
382
|
+
echo "🚀 Creating endpoint: ${ENDPOINT_NAME}"
|
|
383
|
+
if ! aws sagemaker create-endpoint \
|
|
384
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
385
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
386
|
+
--region "${AWS_REGION}"; then
|
|
387
|
+
|
|
388
|
+
echo "❌ Failed to create endpoint"
|
|
389
|
+
echo " Check that:"
|
|
390
|
+
echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
|
|
391
|
+
echo " • You have sufficient service quota in region: ${AWS_REGION}"
|
|
392
|
+
exit 4
|
|
393
|
+
fi
|
|
394
|
+
|
|
395
|
+
echo "✅ Endpoint creation initiated: ${ENDPOINT_NAME}"
|
|
396
|
+
|
|
397
|
+
# Record endpoint in manifest (non-blocking)
|
|
398
|
+
ENDPOINT_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${ENDPOINT_NAME}"
|
|
399
|
+
./do/manifest add \
|
|
400
|
+
--type sagemaker-endpoint \
|
|
401
|
+
--id "${ENDPOINT_ARN}" \
|
|
402
|
+
--project "${PROJECT_NAME}" \
|
|
403
|
+
--meta "{\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
404
|
+
2>/dev/null || true
|
|
405
|
+
fi
|
|
406
|
+
fi
|
|
407
|
+
|
|
408
|
+
# ============================================================
|
|
409
|
+
# Wait for endpoint (skip if already InService or external)
|
|
410
|
+
# ============================================================
|
|
411
|
+
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
412
|
+
echo "⏳ Waiting for endpoint to reach InService status..."
|
|
413
|
+
echo " This may take a few minutes..."
|
|
414
|
+
echo " If this times out, re-run ./do/deploy to resume."
|
|
415
|
+
|
|
416
|
+
wait_endpoint "${ENDPOINT_NAME}"
|
|
417
|
+
|
|
418
|
+
echo "✅ Endpoint is InService: ${ENDPOINT_NAME}"
|
|
419
|
+
fi
|
|
420
|
+
|
|
421
|
+
# ============================================================
|
|
422
|
+
# Step 3: Deploy inference components (skip if resuming from wait_ic)
|
|
423
|
+
# ============================================================
|
|
424
|
+
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "create_ic" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
425
|
+
|
|
426
|
+
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
427
|
+
# _check_gpu_capacity
|
|
428
|
+
# Best-effort capacity guardrail: sums IC_GPU_COUNT across all do/ic/*.conf
|
|
429
|
+
# and compares against known GPU count for the instance type.
|
|
430
|
+
# Warns (does not error) if total exceeds instance capacity.
|
|
431
|
+
# Skips check if instance type is not in the known map.
|
|
432
|
+
_check_gpu_capacity() {
|
|
433
|
+
# Skip check if no INSTANCE_TYPE (external endpoints)
|
|
434
|
+
if [ -z "${INSTANCE_TYPE:-}" ]; then
|
|
435
|
+
return 0
|
|
436
|
+
fi
|
|
437
|
+
|
|
438
|
+
# Best-effort capacity guardrail: sums GPU requirements from base ICs only.
|
|
439
|
+
# NOTE: Only do/ic/*.conf files are counted. Adapter ICs (do/adapters/*.conf)
|
|
440
|
+
# share the base IC's GPU resources and have no ComputeResourceRequirements,
|
|
441
|
+
# so they are intentionally excluded from this capacity check.
|
|
442
|
+
#
|
|
443
|
+
# Hardcoded GPU counts for common SageMaker GPU instance types
|
|
444
|
+
local instance_gpus=""
|
|
445
|
+
case "${INSTANCE_TYPE}" in
|
|
446
|
+
ml.g4dn.xlarge) instance_gpus=1 ;;
|
|
447
|
+
ml.g4dn.12xlarge) instance_gpus=4 ;;
|
|
448
|
+
ml.g5.xlarge) instance_gpus=1 ;;
|
|
449
|
+
ml.g5.2xlarge) instance_gpus=1 ;;
|
|
450
|
+
ml.g5.4xlarge) instance_gpus=1 ;;
|
|
451
|
+
ml.g5.8xlarge) instance_gpus=1 ;;
|
|
452
|
+
ml.g5.12xlarge) instance_gpus=4 ;;
|
|
453
|
+
ml.g5.48xlarge) instance_gpus=8 ;;
|
|
454
|
+
ml.g6.xlarge) instance_gpus=1 ;;
|
|
455
|
+
ml.g6.12xlarge) instance_gpus=4 ;;
|
|
456
|
+
ml.g6.48xlarge) instance_gpus=8 ;;
|
|
457
|
+
ml.g6e.xlarge) instance_gpus=1 ;;
|
|
458
|
+
ml.g6e.2xlarge) instance_gpus=1 ;;
|
|
459
|
+
ml.g6e.4xlarge) instance_gpus=1 ;;
|
|
460
|
+
ml.g6e.8xlarge) instance_gpus=1 ;;
|
|
461
|
+
ml.g6e.12xlarge) instance_gpus=4 ;;
|
|
462
|
+
ml.g6e.48xlarge) instance_gpus=8 ;;
|
|
463
|
+
ml.g7e.xlarge) instance_gpus=1 ;;
|
|
464
|
+
ml.g7e.2xlarge) instance_gpus=1 ;;
|
|
465
|
+
ml.g7e.4xlarge) instance_gpus=1 ;;
|
|
466
|
+
ml.g7e.8xlarge) instance_gpus=1 ;;
|
|
467
|
+
ml.g7e.12xlarge) instance_gpus=4 ;;
|
|
468
|
+
ml.g7e.48xlarge) instance_gpus=8 ;;
|
|
469
|
+
ml.p3.2xlarge) instance_gpus=1 ;;
|
|
470
|
+
ml.p3.8xlarge) instance_gpus=4 ;;
|
|
471
|
+
ml.p3.16xlarge) instance_gpus=8 ;;
|
|
472
|
+
ml.p4d.24xlarge) instance_gpus=8 ;;
|
|
473
|
+
ml.p4de.24xlarge) instance_gpus=8 ;;
|
|
474
|
+
ml.p5.48xlarge) instance_gpus=8 ;;
|
|
475
|
+
*) instance_gpus="" ;;
|
|
476
|
+
esac
|
|
477
|
+
|
|
478
|
+
# Skip check if instance type not in map
|
|
479
|
+
if [ -z "${instance_gpus}" ]; then
|
|
480
|
+
return 0
|
|
481
|
+
fi
|
|
482
|
+
|
|
483
|
+
# Sum IC_GPU_COUNT across all IC config files
|
|
484
|
+
local total_gpu_requested=0
|
|
485
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
486
|
+
[ -f "${conf}" ] || continue
|
|
487
|
+
local ic_gpus
|
|
488
|
+
ic_gpus=$(grep "^export IC_GPU_COUNT=" "${conf}" 2>/dev/null | sed 's/^export IC_GPU_COUNT=//' | tr -d '"' || echo "1")
|
|
489
|
+
if [ -z "${ic_gpus}" ]; then
|
|
490
|
+
ic_gpus=1
|
|
491
|
+
fi
|
|
492
|
+
total_gpu_requested=$(( total_gpu_requested + ic_gpus ))
|
|
493
|
+
done
|
|
494
|
+
|
|
495
|
+
if [ "${total_gpu_requested}" -gt "${instance_gpus}" ]; then
|
|
496
|
+
echo ""
|
|
497
|
+
echo "⚠️ GPU capacity warning: ICs request ${total_gpu_requested} GPUs total, but ${INSTANCE_TYPE} has ${instance_gpus} GPUs."
|
|
498
|
+
echo " SageMaker will likely reject IC creation if capacity is exceeded."
|
|
499
|
+
echo " Consider reducing IC_GPU_COUNT values or using a larger instance type."
|
|
500
|
+
echo ""
|
|
501
|
+
fi
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
# Run capacity guardrail before deploying ICs
|
|
505
|
+
_check_gpu_capacity
|
|
506
|
+
|
|
507
|
+
# _delete_and_wait_ic <ic_name>
|
|
508
|
+
# Deletes an inference component and waits for deletion to complete.
|
|
509
|
+
# Polls until the IC is no longer found (avoids name conflicts on recreate).
|
|
510
|
+
_delete_and_wait_ic() {
|
|
511
|
+
local ic_name="$1"
|
|
512
|
+
local delete_timeout=600 # 10 minutes max wait for deletion
|
|
513
|
+
|
|
514
|
+
echo "🗑️ Deleting inference component: ${ic_name}"
|
|
515
|
+
if ! aws sagemaker delete-inference-component \
|
|
516
|
+
--inference-component-name "${ic_name}" \
|
|
517
|
+
--region "${AWS_REGION}" 2>/dev/null; then
|
|
518
|
+
echo " ⚠️ Delete call failed (IC may already be gone). Continuing..."
|
|
519
|
+
return 0
|
|
520
|
+
fi
|
|
521
|
+
|
|
522
|
+
echo " Waiting for deletion to complete..."
|
|
523
|
+
local delete_start
|
|
524
|
+
delete_start=$(date +%s)
|
|
525
|
+
|
|
526
|
+
while true; do
|
|
527
|
+
local ic_status
|
|
528
|
+
ic_status=$(_get_ic_status "${ic_name}")
|
|
529
|
+
|
|
530
|
+
if [ -z "${ic_status}" ]; then
|
|
531
|
+
echo " ✅ Inference component deleted: ${ic_name}"
|
|
532
|
+
break
|
|
533
|
+
fi
|
|
534
|
+
|
|
535
|
+
local elapsed=$(( $(date +%s) - delete_start ))
|
|
536
|
+
if [ "${elapsed}" -ge "${delete_timeout}" ]; then
|
|
537
|
+
echo " ⚠️ Deletion timed out after ${delete_timeout}s. IC status: ${ic_status}"
|
|
538
|
+
echo " Proceeding anyway — SageMaker may reject the new IC if name conflicts."
|
|
539
|
+
break
|
|
540
|
+
fi
|
|
541
|
+
|
|
542
|
+
echo " $(date +%H:%M:%S) Deleting... (${ic_status}, ${elapsed}s elapsed)"
|
|
543
|
+
sleep 15
|
|
544
|
+
done
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
# _deploy_single_ic <conf_file>
|
|
548
|
+
# Deploys a single IC with per-IC idempotency:
|
|
549
|
+
# - If FORCE_IC is true: delete existing IC, clear state, create fresh
|
|
550
|
+
# - If IC_DEPLOYED_NAME is set and InService → skip
|
|
551
|
+
# - If IC_DEPLOYED_NAME is set and Creating → wait for it
|
|
552
|
+
# - If IC_DEPLOYED_NAME is set and Failed → recreate with new timestamp
|
|
553
|
+
# - If IC_DEPLOYED_NAME is not set → create new IC
|
|
554
|
+
# Fail-fast: exits immediately on failure.
|
|
555
|
+
_deploy_single_ic() {
|
|
556
|
+
local ic_conf="$1"
|
|
557
|
+
local ic_basename
|
|
558
|
+
ic_basename=$(basename "${ic_conf}" .conf)
|
|
559
|
+
|
|
560
|
+
# Source the IC config to check IC_DEPLOYED_NAME
|
|
561
|
+
# Use a subshell-safe approach: read the variable without polluting scope
|
|
562
|
+
local existing_ic_name=""
|
|
563
|
+
if grep -q "^export IC_DEPLOYED_NAME=" "${ic_conf}" 2>/dev/null; then
|
|
564
|
+
existing_ic_name=$(grep "^export IC_DEPLOYED_NAME=" "${ic_conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
|
|
565
|
+
fi
|
|
566
|
+
|
|
567
|
+
# --force-ic: delete existing IC before recreating
|
|
568
|
+
if [ "${FORCE_IC}" = true ] && [ -n "${existing_ic_name}" ]; then
|
|
569
|
+
echo "🔄 --force-ic: recreating IC '${ic_basename}'"
|
|
570
|
+
_delete_and_wait_ic "${existing_ic_name}"
|
|
571
|
+
|
|
572
|
+
# Clear deployed state from config before recreating
|
|
573
|
+
_update_config_var "IC_DEPLOYED_NAME" "" "${ic_conf}"
|
|
574
|
+
_update_config_var "IC_DEPLOYED_AT" "" "${ic_conf}"
|
|
575
|
+
existing_ic_name=""
|
|
576
|
+
fi
|
|
577
|
+
|
|
578
|
+
if [ "${FORCE_IC}" = true ] && [ -z "${existing_ic_name}" ]; then
|
|
579
|
+
# Force mode with no existing IC — just create new
|
|
580
|
+
create_inference_component "${ic_conf}"
|
|
581
|
+
elif [ -n "${existing_ic_name}" ]; then
|
|
582
|
+
# IC was previously deployed — check its current status
|
|
583
|
+
local ic_status
|
|
584
|
+
ic_status=$(_get_ic_status "${existing_ic_name}")
|
|
585
|
+
|
|
586
|
+
case "${ic_status}" in
|
|
587
|
+
InService)
|
|
588
|
+
echo "✅ IC '${ic_basename}' already InService: ${existing_ic_name} — skipping"
|
|
589
|
+
IC_DEPLOYED_NAME="${existing_ic_name}"
|
|
590
|
+
return 0
|
|
591
|
+
;;
|
|
592
|
+
Creating)
|
|
593
|
+
echo "⏳ IC '${ic_basename}' is still Creating: ${existing_ic_name} — waiting..."
|
|
594
|
+
IC_DEPLOYED_NAME="${existing_ic_name}"
|
|
595
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
596
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
597
|
+
return 0
|
|
598
|
+
;;
|
|
599
|
+
Failed)
|
|
600
|
+
echo "⚠️ IC '${ic_basename}' previously Failed: ${existing_ic_name} — recreating..."
|
|
601
|
+
create_inference_component "${ic_conf}"
|
|
602
|
+
;;
|
|
603
|
+
*)
|
|
604
|
+
echo " IC '${ic_basename}' has unknown/missing status for ${existing_ic_name} — creating new..."
|
|
605
|
+
create_inference_component "${ic_conf}"
|
|
606
|
+
;;
|
|
607
|
+
esac
|
|
608
|
+
else
|
|
609
|
+
# No previous deployment — create new IC
|
|
610
|
+
create_inference_component "${ic_conf}"
|
|
611
|
+
fi
|
|
612
|
+
|
|
613
|
+
echo "⏳ Waiting for inference component to reach InService status..."
|
|
614
|
+
echo " This may take 5-10 minutes..."
|
|
615
|
+
|
|
616
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
617
|
+
|
|
618
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
619
|
+
|
|
620
|
+
# Record inference component in manifest (non-blocking)
|
|
621
|
+
local ic_arn="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_DEPLOYED_NAME}"
|
|
622
|
+
./do/manifest add \
|
|
623
|
+
--type sagemaker-inference-component \
|
|
624
|
+
--id "${ic_arn}" \
|
|
625
|
+
--project "${PROJECT_NAME}" \
|
|
626
|
+
--meta "{\"inferenceComponentName\":\"${IC_DEPLOYED_NAME}\",\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE:-external}\",\"region\":\"${AWS_REGION}\"}" \
|
|
627
|
+
2>/dev/null || true
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
if [ -n "${IC_TARGET}" ]; then
|
|
631
|
+
# Single IC path: deploy only the named IC
|
|
632
|
+
echo ""
|
|
633
|
+
echo "── Deploying IC: ${IC_TARGET} ──"
|
|
634
|
+
_deploy_single_ic "${SCRIPT_DIR}/ic/${IC_TARGET}.conf"
|
|
635
|
+
else
|
|
636
|
+
# Multi-IC path: iterate all IC config files (alphabetical order)
|
|
637
|
+
IC_SUMMARY=""
|
|
638
|
+
IC_DEPLOY_FAILED=false
|
|
639
|
+
|
|
640
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
641
|
+
[ -f "${conf}" ] || continue
|
|
642
|
+
local_ic_basename=$(basename "${conf}" .conf)
|
|
643
|
+
echo ""
|
|
644
|
+
echo "── Deploying IC: ${local_ic_basename} ──"
|
|
645
|
+
|
|
646
|
+
if ! _deploy_single_ic "${conf}"; then
|
|
647
|
+
echo "❌ IC '${local_ic_basename}' failed to deploy. Stopping."
|
|
648
|
+
IC_SUMMARY="${IC_SUMMARY} ${local_ic_basename}: FAILED\n"
|
|
649
|
+
IC_DEPLOY_FAILED=true
|
|
650
|
+
break
|
|
651
|
+
fi
|
|
652
|
+
|
|
653
|
+
IC_SUMMARY="${IC_SUMMARY} ${local_ic_basename}: ${IC_DEPLOYED_NAME} [InService]\n"
|
|
654
|
+
done
|
|
655
|
+
|
|
656
|
+
# Print summary
|
|
657
|
+
echo ""
|
|
658
|
+
echo "📋 IC Deployment Summary:"
|
|
659
|
+
echo -e "${IC_SUMMARY}"
|
|
660
|
+
|
|
661
|
+
if [ "${IC_DEPLOY_FAILED}" = true ]; then
|
|
662
|
+
echo "❌ Deployment stopped due to IC failure. Fix the issue and re-run ./do/deploy to resume."
|
|
663
|
+
exit 4
|
|
664
|
+
fi
|
|
665
|
+
fi
|
|
666
|
+
else
|
|
667
|
+
# Legacy single-IC path: no do/ic/ directory
|
|
668
|
+
create_inference_component_legacy
|
|
669
|
+
|
|
670
|
+
echo "⏳ Waiting for inference component to reach InService status..."
|
|
671
|
+
echo " This may take 5-10 minutes..."
|
|
672
|
+
echo " If this times out, re-run ./do/deploy to resume."
|
|
673
|
+
|
|
674
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
675
|
+
|
|
676
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
677
|
+
|
|
678
|
+
# Record inference component in manifest (non-blocking)
|
|
679
|
+
IC_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:inference-component/${IC_DEPLOYED_NAME}"
|
|
680
|
+
./do/manifest add \
|
|
681
|
+
--type sagemaker-inference-component \
|
|
682
|
+
--id "${IC_ARN}" \
|
|
683
|
+
--project "${PROJECT_NAME}" \
|
|
684
|
+
--meta "{\"inferenceComponentName\":\"${IC_DEPLOYED_NAME}\",\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE:-external}\",\"region\":\"${AWS_REGION}\"}" \
|
|
685
|
+
2>/dev/null || true
|
|
686
|
+
fi
|
|
687
|
+
|
|
688
|
+
elif [ "${SKIP_TO}" = "wait_ic" ]; then
|
|
689
|
+
# Resuming: just wait for the IC that was already being created
|
|
690
|
+
echo "⏳ Waiting for inference component to reach InService status..."
|
|
691
|
+
echo " This may take 5-10 minutes..."
|
|
692
|
+
echo " If this times out, re-run ./do/deploy to resume."
|
|
693
|
+
|
|
694
|
+
wait_ic "${IC_DEPLOYED_NAME}"
|
|
695
|
+
|
|
696
|
+
echo "✅ Inference component is InService: ${IC_DEPLOYED_NAME}"
|
|
697
|
+
fi
|
|
698
|
+
|
|
699
|
+
echo "✅ Deployment complete!"
|
|
700
|
+
echo ""
|
|
701
|
+
echo "📋 Deployment Details:"
|
|
702
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
703
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
704
|
+
echo " Endpoint Config: (external — not managed by this project)"
|
|
705
|
+
echo " Region: ${AWS_REGION}"
|
|
706
|
+
else
|
|
707
|
+
echo " Endpoint Config: ${ENDPOINT_CONFIG_NAME:-N/A}"
|
|
708
|
+
echo " Region: ${AWS_REGION}"
|
|
709
|
+
echo " Instance Type: ${INSTANCE_TYPE}"
|
|
710
|
+
fi
|
|
711
|
+
echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
|
|
712
|
+
echo ""
|
|
713
|
+
echo "📋 What's next?"
|
|
714
|
+
echo " • Test your endpoint: ./do/test"
|
|
715
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
716
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
717
|
+
<% } %>
|
|
718
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
719
|
+
echo " • Add a LoRA adapter: ./do/adapter add <name> --weights s3://..."
|
|
720
|
+
<% } %>
|
|
721
|
+
echo " • View endpoint status: ./do/status"
|
|
722
|
+
echo " • Register this deployment: ./do/register"
|
|
723
|
+
echo " • View logs: ./do/logs"
|
|
724
|
+
<% if (!(typeof existingEndpointName !== 'undefined' && existingEndpointName)) { %>
|
|
725
|
+
echo " • Clean up when done: ./do/clean endpoint"
|
|
726
|
+
<% } %>
|