@aws/ml-container-creator 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/bin/cli.js +31 -137
  2. package/config/parameter-schema-v2.json +2065 -0
  3. package/package.json +6 -3
  4. package/servers/lib/catalogs/jumpstart-public.json +101 -16
  5. package/servers/lib/catalogs/models.json +182 -26
  6. package/src/app.js +6 -389
  7. package/src/lib/bootstrap-command-handler.js +75 -1078
  8. package/src/lib/bootstrap-profile-manager.js +634 -0
  9. package/src/lib/bootstrap-provisioners.js +421 -0
  10. package/src/lib/config-loader.js +405 -0
  11. package/src/lib/config-manager.js +59 -1668
  12. package/src/lib/config-mcp-client.js +118 -0
  13. package/src/lib/config-validator.js +634 -0
  14. package/src/lib/cuda-resolver.js +140 -0
  15. package/src/lib/e2e-catalog-validator.js +251 -3
  16. package/src/lib/e2e-ci-recorder.js +103 -0
  17. package/src/lib/generated/cli-options.js +471 -0
  18. package/src/lib/generated/parameter-matrix.js +671 -0
  19. package/src/lib/generated/validation-rules.js +202 -0
  20. package/src/lib/marketplace-flow.js +276 -0
  21. package/src/lib/mcp-query-runner.js +768 -0
  22. package/src/lib/parameter-schema-validator.js +62 -18
  23. package/src/lib/prompt-runner.js +41 -1504
  24. package/src/lib/prompts/feature-prompts.js +172 -0
  25. package/src/lib/prompts/index.js +48 -0
  26. package/src/lib/prompts/infrastructure-prompts.js +690 -0
  27. package/src/lib/prompts/model-prompts.js +552 -0
  28. package/src/lib/prompts/project-prompts.js +70 -0
  29. package/src/lib/prompts.js +2 -1446
  30. package/src/lib/registry-command-handler.js +135 -3
  31. package/src/lib/secrets-prompt-runner.js +251 -0
  32. package/src/lib/template-variable-resolver.js +398 -0
  33. package/templates/code/serve +5 -134
  34. package/templates/code/serve.d/lmi.ejs +19 -0
  35. package/templates/code/serve.d/sglang.ejs +47 -0
  36. package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
  37. package/templates/code/serve.d/vllm.ejs +48 -0
  38. package/templates/do/clean +1 -1387
  39. package/templates/do/clean.d/async-inference.ejs +508 -0
  40. package/templates/do/clean.d/batch-transform.ejs +512 -0
  41. package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
  42. package/templates/do/clean.d/managed-inference.ejs +1043 -0
  43. package/templates/do/deploy +1 -1766
  44. package/templates/do/deploy.d/async-inference.ejs +501 -0
  45. package/templates/do/deploy.d/batch-transform.ejs +529 -0
  46. package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
  47. package/templates/do/deploy.d/managed-inference.ejs +726 -0
  48. package/config/parameter-schema.json +0 -88
@@ -0,0 +1,501 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ set -e
6
+ set -u
7
+ set -o pipefail
8
+
9
+ # Parse flags
10
+ FORCE_NEW=false
11
+ FORCE_IC=false
12
+ IC_TARGET=""
13
+ while [ $# -gt 0 ]; do
14
+ case "$1" in
15
+ --force) FORCE_NEW=true; shift ;;
16
+ --force-ic)
17
+ FORCE_IC=true
18
+ shift
19
+ ;;
20
+ --help|-h)
21
+ echo "Usage: ./do/deploy [--force] [--force-ic]"
22
+ echo ""
23
+ echo "Options:"
24
+ echo " --force Create a new endpoint, even if one already exists."
25
+ echo " --force-ic Recreate the inference component on the existing endpoint."
26
+ echo ""
27
+ echo "Without flags, deploy resumes from the last run."
28
+ exit 0
29
+ ;;
30
+ *)
31
+ echo "❌ Unknown option: $1"
32
+ echo " Run ./do/deploy --help for usage."
33
+ exit 1
34
+ ;;
35
+ esac
36
+ done
37
+
38
+ # Source configuration
39
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
40
+ source "${SCRIPT_DIR}/config"
41
+
42
+ echo "🚀 Deploying to AWS"
43
+ echo " Project: ${PROJECT_NAME}"
44
+ echo " Deployment config: ${DEPLOYMENT_CONFIG}"
45
+ echo " Region: ${AWS_REGION}"
46
+ echo " Build target: ${BUILD_TARGET}"
47
+ echo " Deployment target: ${DEPLOYMENT_TARGET}"
48
+ echo " Instance type: ${INSTANCE_TYPE}"
49
+ echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
50
+ echo " SNS success: ${ASYNC_SNS_SUCCESS_TOPIC}"
51
+ echo " SNS error: ${ASYNC_SNS_ERROR_TOPIC}"
52
+ <% if (asyncMaxConcurrentInvocations) { %>
53
+ echo " Max concurrent: ${ASYNC_MAX_CONCURRENT_INVOCATIONS}"
54
+ <% } %>
55
+
56
+ # Check AWS credentials
57
+ echo "🔍 Validating AWS credentials..."
58
+ if ! aws sts get-caller-identity &> /dev/null; then
59
+ echo "❌ AWS credentials not configured"
60
+ echo " Run: aws configure"
61
+ echo " Or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"
62
+ exit 4
63
+ fi
64
+
65
+ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
66
+ echo "✅ AWS credentials validated (Account: ${AWS_ACCOUNT_ID})"
67
+
68
+ # Construct ECR repository URL
69
+ ECR_REPOSITORY="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${ECR_REPOSITORY_NAME}"
70
+
71
+ # ============================================================
72
+ # Shared: Verify ECR image exists
73
+ # ============================================================
74
+ echo "🔍 Verifying ECR image exists..."
75
+ if ! aws ecr describe-images \
76
+ --repository-name "${ECR_REPOSITORY_NAME}" \
77
+ --image-ids imageTag="${PROJECT_NAME}-latest" \
78
+ --region "${AWS_REGION}" &> /dev/null; then
79
+
80
+ echo "❌ ECR image not found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
81
+ echo ""
82
+ echo "Please build and push your image first:"
83
+ echo " ./do/submit"
84
+ echo ""
85
+ echo "After the build completes successfully, run this deploy script again."
86
+ exit 4
87
+ fi
88
+
89
+ echo "✅ ECR image found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
90
+ IMAGE_TAG="${PROJECT_NAME}-latest"
91
+
92
+ # ============================================================
93
+ # Shared: Resolve secrets for container environment
94
+ # ============================================================
95
+ CONTAINER_ENV_JSON=""
96
+
97
+ if [ -n "${HF_TOKEN_ARN:-}" ]; then
98
+ echo "🔐 Resolving HuggingFace token from Secrets Manager..."
99
+ RESOLVED_HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
100
+ echo "❌ Failed to resolve HuggingFace token from Secrets Manager"
101
+ exit 3
102
+ }
103
+ CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${RESOLVED_HF_TOKEN}\""
104
+ elif [ -n "${HF_TOKEN:-}" ]; then
105
+ CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${HF_TOKEN}\""
106
+ fi
107
+
108
+ if [ -n "${NGC_API_KEY_ARN:-}" ]; then
109
+ echo "🔐 Resolving NGC API key from Secrets Manager..."
110
+ RESOLVED_NGC_KEY=$(aws secretsmanager get-secret-value --secret-id "${NGC_API_KEY_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
111
+ echo "❌ Failed to resolve NGC API key from Secrets Manager"
112
+ exit 3
113
+ }
114
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
115
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
116
+ else
117
+ CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
118
+ fi
119
+ elif [ -n "${NGC_API_KEY:-}" ]; then
120
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
121
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${NGC_API_KEY}\""
122
+ else
123
+ CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${NGC_API_KEY}\""
124
+ fi
125
+ fi
126
+
127
+ # ============================================================
128
+ # SageMaker Async Inference Deployment (Model-Based)
129
+ # SageMaker async inference does NOT support Inference Components.
130
+ # Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
131
+ # ============================================================
132
+
133
+ # Source shared helpers
134
+ source "${SCRIPT_DIR}/lib/secrets.sh"
135
+ source "${SCRIPT_DIR}/lib/wait.sh"
136
+
137
+ # Resolve container secrets (HF_TOKEN, NGC_API_KEY)
138
+ resolve_secrets
139
+
140
+ # Validate execution role ARN
141
+ if [ -z "${ROLE_ARN:-}" ]; then
142
+ echo "❌ Execution role ARN not provided"
143
+ echo ""
144
+ echo "Usage:"
145
+ echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
146
+ echo " ./do/deploy"
147
+ echo ""
148
+ echo "Or set ROLE_ARN in do/config"
149
+ echo ""
150
+ echo "The execution role must have permissions for:"
151
+ echo " • SageMaker model and endpoint management"
152
+ echo " • ECR image access"
153
+ echo " • S3 write access for async output path: ${ASYNC_S3_OUTPUT_PATH}"
154
+ echo " • SNS publish permissions (optional, for notifications)"
155
+ echo " • CloudWatch Logs"
156
+ exit 3
157
+ fi
158
+
159
+ echo " Using execution role: ${ROLE_ARN}"
160
+
161
+ # ============================================================
162
+ # Bootstrap async infrastructure (S3 bucket + SNS topics)
163
+ # ============================================================
164
+
165
+ # Extract bucket name from S3 output path
166
+ ASYNC_S3_BUCKET=$(echo "${ASYNC_S3_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
167
+
168
+ <% if (!asyncS3OutputPath) { %>
169
+ # Bootstrap default S3 bucket (check-and-create)
170
+ echo "🔍 Checking if S3 bucket exists: ${ASYNC_S3_BUCKET}"
171
+ if ! aws s3api head-bucket --bucket "${ASYNC_S3_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
172
+ echo "📦 Creating S3 bucket: ${ASYNC_S3_BUCKET}"
173
+ if [ "${AWS_REGION}" = "us-east-1" ]; then
174
+ if ! aws s3api create-bucket \
175
+ --bucket "${ASYNC_S3_BUCKET}" \
176
+ --region "${AWS_REGION}"; then
177
+ echo "❌ Failed to create S3 bucket: ${ASYNC_S3_BUCKET}"
178
+ echo ""
179
+ echo " Check that:"
180
+ echo " • Your IAM credentials have s3:CreateBucket permission"
181
+ echo " • The bucket name is not already taken globally"
182
+ exit 4
183
+ fi
184
+ else
185
+ if ! aws s3api create-bucket \
186
+ --bucket "${ASYNC_S3_BUCKET}" \
187
+ --region "${AWS_REGION}" \
188
+ --create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
189
+ echo "❌ Failed to create S3 bucket: ${ASYNC_S3_BUCKET}"
190
+ echo ""
191
+ echo " Check that:"
192
+ echo " • Your IAM credentials have s3:CreateBucket permission"
193
+ echo " • The bucket name is not already taken globally"
194
+ exit 4
195
+ fi
196
+ fi
197
+ echo "✅ S3 bucket created: ${ASYNC_S3_BUCKET}"
198
+ else
199
+ echo "✅ S3 bucket exists: ${ASYNC_S3_BUCKET}"
200
+ fi
201
+ <% } else { %>
202
+ # Custom S3 output path provided — skip bucket creation
203
+ echo "✅ Using custom S3 output path: ${ASYNC_S3_OUTPUT_PATH}"
204
+ <% } %>
205
+
206
+ # Extract topic name from SNS success topic ARN
207
+ ASYNC_SNS_SUCCESS_TOPIC_NAME=$(echo "${ASYNC_SNS_SUCCESS_TOPIC}" | awk -F: '{print $NF}')
208
+
209
+ <% if (!asyncSnsSuccessTopic) { %>
210
+ # Bootstrap default SNS success topic (check-and-create)
211
+ echo "🔍 Checking if SNS success topic exists: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
212
+ if ! aws sns get-topic-attributes --topic-arn "${ASYNC_SNS_SUCCESS_TOPIC}" --region "${AWS_REGION}" 2>/dev/null; then
213
+ echo "📦 Creating SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
214
+ if ! aws sns create-topic \
215
+ --name "${ASYNC_SNS_SUCCESS_TOPIC_NAME}" \
216
+ --region "${AWS_REGION}" > /dev/null; then
217
+ echo "❌ Failed to create SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
218
+ echo ""
219
+ echo " Check that:"
220
+ echo " • Your IAM credentials have sns:CreateTopic permission"
221
+ exit 4
222
+ fi
223
+ echo "✅ SNS success topic created: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
224
+ else
225
+ echo "✅ SNS success topic exists: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
226
+ fi
227
+
228
+ # Record SNS success topic in manifest (non-blocking)
229
+ ./do/manifest add \
230
+ --type sns-topic \
231
+ --id "${ASYNC_SNS_SUCCESS_TOPIC}" \
232
+ --project "${PROJECT_NAME}" \
233
+ --meta "{\"topicName\":\"${ASYNC_SNS_SUCCESS_TOPIC_NAME}\",\"purpose\":\"async-success\",\"region\":\"${AWS_REGION}\"}" \
234
+ 2>/dev/null || true
235
+
236
+ <% } else { %>
237
+ # Custom SNS success topic ARN provided — skip topic creation
238
+ echo "✅ Using custom SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC}"
239
+
240
+ # Record SNS success topic in manifest (non-blocking)
241
+ ASYNC_SNS_SUCCESS_TOPIC_NAME=$(echo "${ASYNC_SNS_SUCCESS_TOPIC}" | awk -F: '{print $NF}')
242
+ ./do/manifest add \
243
+ --type sns-topic \
244
+ --id "${ASYNC_SNS_SUCCESS_TOPIC}" \
245
+ --project "${PROJECT_NAME}" \
246
+ --meta "{\"topicName\":\"${ASYNC_SNS_SUCCESS_TOPIC_NAME}\",\"purpose\":\"async-success\",\"region\":\"${AWS_REGION}\"}" \
247
+ 2>/dev/null || true
248
+
249
+ <% } %>
250
+
251
+ # Extract topic name from SNS error topic ARN
252
+ ASYNC_SNS_ERROR_TOPIC_NAME=$(echo "${ASYNC_SNS_ERROR_TOPIC}" | awk -F: '{print $NF}')
253
+
254
+ <% if (!asyncSnsErrorTopic) { %>
255
+ # Bootstrap default SNS error topic (check-and-create)
256
+ echo "🔍 Checking if SNS error topic exists: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
257
+ if ! aws sns get-topic-attributes --topic-arn "${ASYNC_SNS_ERROR_TOPIC}" --region "${AWS_REGION}" 2>/dev/null; then
258
+ echo "📦 Creating SNS error topic: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
259
+ if ! aws sns create-topic \
260
+ --name "${ASYNC_SNS_ERROR_TOPIC_NAME}" \
261
+ --region "${AWS_REGION}" > /dev/null; then
262
+ echo "❌ Failed to create SNS error topic: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
263
+ echo ""
264
+ echo " Check that:"
265
+ echo " • Your IAM credentials have sns:CreateTopic permission"
266
+ exit 4
267
+ fi
268
+ echo "✅ SNS error topic created: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
269
+ else
270
+ echo "✅ SNS error topic exists: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
271
+ fi
272
+
273
+ # Record SNS error topic in manifest (non-blocking)
274
+ ./do/manifest add \
275
+ --type sns-topic \
276
+ --id "${ASYNC_SNS_ERROR_TOPIC}" \
277
+ --project "${PROJECT_NAME}" \
278
+ --meta "{\"topicName\":\"${ASYNC_SNS_ERROR_TOPIC_NAME}\",\"purpose\":\"async-error\",\"region\":\"${AWS_REGION}\"}" \
279
+ 2>/dev/null || true
280
+
281
+ <% } else { %>
282
+ # Custom SNS error topic ARN provided — skip topic creation
283
+ echo "✅ Using custom SNS error topic: ${ASYNC_SNS_ERROR_TOPIC}"
284
+
285
+ # Record SNS error topic in manifest (non-blocking)
286
+ ASYNC_SNS_ERROR_TOPIC_NAME=$(echo "${ASYNC_SNS_ERROR_TOPIC}" | awk -F: '{print $NF}')
287
+ ./do/manifest add \
288
+ --type sns-topic \
289
+ --id "${ASYNC_SNS_ERROR_TOPIC}" \
290
+ --project "${PROJECT_NAME}" \
291
+ --meta "{\"topicName\":\"${ASYNC_SNS_ERROR_TOPIC_NAME}\",\"purpose\":\"async-error\",\"region\":\"${AWS_REGION}\"}" \
292
+ 2>/dev/null || true
293
+
294
+ <% } %>
295
+
296
+ # ============================================================
297
+ # Create async endpoint (classic model-based flow)
298
+ # SageMaker async inference does NOT support Inference Components.
299
+ # Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
300
+ # ============================================================
301
+
302
+ # ============================================================
303
+ # Idempotency: check for existing deployment from a previous run
304
+ # ============================================================
305
+ SKIP_TO=""
306
+
307
+ if [ "${FORCE_NEW}" = true ]; then
308
+ echo "🔄 --force: ignoring previous deployment, creating new resources."
309
+ elif [ -n "${ENDPOINT_NAME:-}" ]; then
310
+ echo "🔍 Checking for existing deployment: ${ENDPOINT_NAME}"
311
+
312
+ EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
313
+
314
+ case "${EP_STATUS}" in
315
+ InService)
316
+ echo "✅ Async endpoint already InService: ${ENDPOINT_NAME}"
317
+ echo ""
318
+ echo "📋 Deployment is already live. Nothing to do."
319
+ echo " Endpoint: ${ENDPOINT_NAME}"
320
+ echo ""
321
+ echo "🧪 Test your async endpoint:"
322
+ echo " ./do/test"
323
+ echo ""
324
+ echo "🧹 Clean up when done:"
325
+ echo " ./do/clean endpoint"
326
+ exit 0
327
+ ;;
328
+ Creating|Updating)
329
+ echo "⏳ Endpoint still ${EP_STATUS}: ${ENDPOINT_NAME}"
330
+ SKIP_TO="wait_endpoint"
331
+ ;;
332
+ Failed)
333
+ echo "⚠️ Previous endpoint failed: ${ENDPOINT_NAME}"
334
+ echo " Creating a new deployment. Clean up the failed endpoint with:"
335
+ echo " ./do/clean endpoint"
336
+ echo ""
337
+ ;;
338
+ "")
339
+ echo " Previous endpoint not found (may have been cleaned up). Creating new deployment."
340
+ ;;
341
+ *)
342
+ echo " Endpoint in unexpected state: ${EP_STATUS}. Creating new deployment."
343
+ ;;
344
+ esac
345
+ fi
346
+
347
+ # ============================================================
348
+ # Create async resources (skip if resuming from wait)
349
+ # ============================================================
350
+ if [ -z "${SKIP_TO}" ]; then
351
+ TIMESTAMP=$(date +%s)
352
+ MODEL_NAME_SM="${PROJECT_NAME}-async-model-${TIMESTAMP}"
353
+ ENDPOINT_CONFIG_NAME="${PROJECT_NAME}-async-epc-${TIMESTAMP}"
354
+ ENDPOINT_NAME="${PROJECT_NAME}-async-ep-${TIMESTAMP}"
355
+
356
+ _update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
357
+ _update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
358
+ _update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
359
+
360
+ # Step 1: Create SageMaker model
361
+ # Build primary container spec
362
+ PRIMARY_CONTAINER="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
363
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
364
+ PRIMARY_CONTAINER="${PRIMARY_CONTAINER},\"Environment\":{${CONTAINER_ENV_JSON}}"
365
+ fi
366
+ PRIMARY_CONTAINER="${PRIMARY_CONTAINER}}"
367
+
368
+ echo "📦 Creating SageMaker model: ${MODEL_NAME_SM}"
369
+ if ! aws sagemaker create-model \
370
+ --model-name "${MODEL_NAME_SM}" \
371
+ --primary-container "${PRIMARY_CONTAINER}" \
372
+ --execution-role-arn "${ROLE_ARN}" \
373
+ --region "${AWS_REGION}"; then
374
+
375
+ echo "❌ Failed to create SageMaker model"
376
+ echo " Check that:"
377
+ echo " • The execution role ARN is valid"
378
+ echo " • The ECR image exists and is accessible"
379
+ echo " • The IAM role has ecr:GetDownloadUrlForLayer permission"
380
+ exit 4
381
+ fi
382
+
383
+ echo "✅ SageMaker model created: ${MODEL_NAME_SM}"
384
+
385
+ # Record model in manifest (non-blocking)
386
+ MODEL_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${MODEL_NAME_SM}"
387
+ ./do/manifest add \
388
+ --type sagemaker-model \
389
+ --id "${MODEL_ARN}" \
390
+ --project "${PROJECT_NAME}" \
391
+ --meta "{\"modelName\":\"${MODEL_NAME_SM}\",\"region\":\"${AWS_REGION}\"}" \
392
+ 2>/dev/null || true
393
+
394
+ # Build production variant JSON (classic: includes ModelName, no execution-role-arn on endpoint config)
395
+ VARIANT_JSON="[{\"VariantName\":\"AllTraffic\",\"ModelName\":\"${MODEL_NAME_SM}\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1"
396
+
397
+ # Append InferenceAmiVersion if configured
398
+ if [ -n "${INFERENCE_AMI_VERSION:-}" ]; then
399
+ VARIANT_JSON="${VARIANT_JSON},\"InferenceAmiVersion\":\"${INFERENCE_AMI_VERSION}\""
400
+ echo " AMI version: ${INFERENCE_AMI_VERSION}"
401
+ fi
402
+
403
+ VARIANT_JSON="${VARIANT_JSON}}]"
404
+
405
+ # Build AsyncInferenceConfig JSON
406
+ ASYNC_CONFIG="{\"OutputConfig\":{\"S3OutputPath\":\"${ASYNC_S3_OUTPUT_PATH}\",\"NotificationConfig\":{\"SuccessTopic\":\"${ASYNC_SNS_SUCCESS_TOPIC}\",\"ErrorTopic\":\"${ASYNC_SNS_ERROR_TOPIC}\"}}"
407
+
408
+ if [ -n "${ASYNC_MAX_CONCURRENT_INVOCATIONS:-}" ]; then
409
+ ASYNC_CONFIG="${ASYNC_CONFIG},\"ClientConfig\":{\"MaxConcurrentInvocationsPerInstance\":${ASYNC_MAX_CONCURRENT_INVOCATIONS}}"
410
+ fi
411
+
412
+ ASYNC_CONFIG="${ASYNC_CONFIG}}"
413
+
414
+ # Step 2: Create endpoint configuration with AsyncInferenceConfig (no --execution-role-arn)
415
+ echo "⚙️ Creating async endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
416
+ if ! aws sagemaker create-endpoint-config \
417
+ --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
418
+ --production-variants "${VARIANT_JSON}" \
419
+ --async-inference-config "${ASYNC_CONFIG}" \
420
+ --region "${AWS_REGION}"; then
421
+
422
+ echo "❌ Failed to create async endpoint configuration"
423
+ echo " Check that:"
424
+ echo " • The S3 output path is accessible: ${ASYNC_S3_OUTPUT_PATH}"
425
+ echo " • The IAM role has s3:PutObject permission on the output path"
426
+ echo " • The instance type is valid: ${INSTANCE_TYPE}"
427
+ echo " • The instance type is available in region: ${AWS_REGION}"
428
+ echo " • You have sufficient service quota for the instance type"
429
+ exit 4
430
+ fi
431
+
432
+ echo "✅ Async endpoint configuration created: ${ENDPOINT_CONFIG_NAME}"
433
+
434
+ # Record endpoint config in manifest (non-blocking)
435
+ ENDPOINT_CONFIG_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${ENDPOINT_CONFIG_NAME}"
436
+ ./do/manifest add \
437
+ --type sagemaker-endpoint-config \
438
+ --id "${ENDPOINT_CONFIG_ARN}" \
439
+ --project "${PROJECT_NAME}" \
440
+ --meta "{\"endpointConfigName\":\"${ENDPOINT_CONFIG_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
441
+ 2>/dev/null || true
442
+
443
+ # Step 3: Create endpoint
444
+ echo "🚀 Creating async endpoint: ${ENDPOINT_NAME}"
445
+ if ! aws sagemaker create-endpoint \
446
+ --endpoint-name "${ENDPOINT_NAME}" \
447
+ --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
448
+ --region "${AWS_REGION}"; then
449
+
450
+ echo "❌ Failed to create async endpoint"
451
+ echo " Check that:"
452
+ echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
453
+ echo " • You have sufficient service quota in region: ${AWS_REGION}"
454
+ exit 4
455
+ fi
456
+
457
+ echo "✅ Async endpoint creation initiated: ${ENDPOINT_NAME}"
458
+
459
+ # Record endpoint in manifest (non-blocking)
460
+ ENDPOINT_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${ENDPOINT_NAME}"
461
+ ./do/manifest add \
462
+ --type sagemaker-endpoint \
463
+ --id "${ENDPOINT_ARN}" \
464
+ --project "${PROJECT_NAME}" \
465
+ --meta "{\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
466
+ 2>/dev/null || true
467
+ fi
468
+
469
+ # ============================================================
470
+ # Wait for endpoint (skip if already InService)
471
+ # ============================================================
472
+ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
473
+ echo "⏳ Waiting for async endpoint to reach InService status..."
474
+ echo " This may take several minutes..."
475
+ echo " If this times out, re-run ./do/deploy to resume."
476
+
477
+ wait_endpoint "${ENDPOINT_NAME}"
478
+ fi
479
+
480
+ echo "✅ Async deployment complete!"
481
+ echo ""
482
+ echo "📋 Deployment Details:"
483
+ echo " Endpoint: ${ENDPOINT_NAME}"
484
+ echo " Endpoint Config: ${ENDPOINT_CONFIG_NAME}"
485
+ echo " Model: ${MODEL_NAME_SM}"
486
+ echo " Region: ${AWS_REGION}"
487
+ echo " Instance Type: ${INSTANCE_TYPE}"
488
+ echo " Image: ${ECR_REPOSITORY}:${IMAGE_TAG}"
489
+ echo " S3 Output: ${ASYNC_S3_OUTPUT_PATH}"
490
+ echo " SNS Success: ${ASYNC_SNS_SUCCESS_TOPIC}"
491
+ echo " SNS Error: ${ASYNC_SNS_ERROR_TOPIC}"
492
+ echo ""
493
+ echo "📋 What's next?"
494
+ echo " • Test your async endpoint: ./do/test"
495
+ echo " • Check async output: aws s3 ls ${ASYNC_S3_OUTPUT_PATH}"
496
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
497
+ echo " • Benchmark performance: ./do/benchmark"
498
+ <% } %>
499
+ echo " • Register this deployment: ./do/register"
500
+ echo " • View logs: ./do/logs"
501
+ echo " • Clean up when done: ./do/clean endpoint"