@aws/ml-container-creator 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/LICENSE-THIRD-PARTY +50760 -16218
  2. package/bin/cli.js +1 -1
  3. package/infra/ci-harness/buildspec.yml +4 -0
  4. package/package.json +3 -1
  5. package/servers/lib/catalogs/instances.json +52 -1275
  6. package/servers/lib/catalogs/model-servers.json +80 -0
  7. package/servers/lib/catalogs/models.json +0 -132
  8. package/servers/lib/catalogs/popular-diffusors.json +1 -110
  9. package/servers/model-picker/index.js +27 -16
  10. package/src/app.js +113 -23
  11. package/src/lib/cli-handler.js +1 -1
  12. package/src/lib/config-manager.js +39 -2
  13. package/src/lib/cross-cutting-checker.js +146 -33
  14. package/src/lib/deployment-config-resolver.js +10 -4
  15. package/src/lib/e2e-bootstrap.js +227 -0
  16. package/src/lib/e2e-catalog-validator.js +103 -0
  17. package/src/lib/e2e-quota-validator.js +135 -0
  18. package/src/lib/mcp-client.js +16 -1
  19. package/src/lib/mcp-command-handler.js +10 -2
  20. package/src/lib/prompt-runner.js +306 -24
  21. package/src/lib/prompts.js +9 -3
  22. package/src/lib/template-manager.js +10 -4
  23. package/src/lib/train-config-parser.js +136 -0
  24. package/src/lib/train-config-persistence.js +143 -0
  25. package/src/lib/train-config-validator.js +112 -0
  26. package/src/lib/train-feedback.js +46 -0
  27. package/src/lib/train-idempotency.js +97 -0
  28. package/src/lib/train-request-builder.js +120 -0
  29. package/src/lib/tune-catalog-validator.js +5 -5
  30. package/templates/code/serve +2 -2
  31. package/templates/code/serving.properties +2 -2
  32. package/templates/diffusors/serve +3 -3
  33. package/templates/do/.train_build_request.py +141 -0
  34. package/templates/do/.train_poll_parser.py +135 -0
  35. package/templates/do/.train_status_parser.py +187 -0
  36. package/templates/do/.tune_helper.py +2 -2
  37. package/templates/do/lib/feedback.sh +41 -0
  38. package/templates/do/register +8 -2
  39. package/templates/do/test +5 -5
  40. package/templates/do/train +786 -0
  41. package/templates/do/training/config.yaml +140 -0
  42. package/templates/do/training/train.py +463 -0
  43. package/templates/do/tune +2 -2
  44. package/templates/marketplace/config +118 -0
  45. package/templates/marketplace/deploy +890 -0
  46. package/templates/marketplace/test +453 -0
@@ -0,0 +1,890 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Marketplace model package deployment script.
6
+ # Deploys a pre-built AWS Marketplace model package using CreateModel with ModelPackageName.
7
+ # No build, push, or submit steps — the vendor provides the container and weights.
8
+
9
+ set -e
10
+ set -u
11
+ set -o pipefail
12
+
13
+ # Parse flags
14
+ FORCE_NEW=false
15
+ FORCE_IC=false
16
+ while [ $# -gt 0 ]; do
17
+ case "$1" in
18
+ --force) FORCE_NEW=true; shift ;;
19
+ --force-ic) FORCE_IC=true; shift ;;
20
+ --help|-h)
21
+ echo "Usage: ./do/deploy [--force] [--force-ic]"
22
+ echo ""
23
+ echo "Options:"
24
+ echo " --force Create a new deployment, even if one already exists."
25
+ echo " --force-ic Recreate the endpoint configuration on the existing endpoint."
26
+ echo ""
27
+ echo "Without flags, deploy resumes from the last run."
28
+ exit 0
29
+ ;;
30
+ *)
31
+ echo "❌ Unknown option: $1"
32
+ echo " Run ./do/deploy --help for usage."
33
+ exit 1
34
+ ;;
35
+ esac
36
+ done
37
+
38
+ # Source configuration
39
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
40
+ source "${SCRIPT_DIR}/config"
41
+
42
+ echo "🚀 Deploying Marketplace Model Package"
43
+ echo " Project: ${PROJECT_NAME}"
44
+ echo " Deployment config: marketplace"
45
+ echo " Region: ${AWS_REGION}"
46
+ echo " Model package: ${MODEL_PACKAGE_ARN}"
47
+ echo " Deployment target: ${DEPLOYMENT_TARGET}"
48
+ echo " Instance type: ${INSTANCE_TYPE}"
49
+ <% if (deploymentTarget === 'async-inference') { %>
50
+ echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
51
+ echo " SNS success: ${ASYNC_SNS_SUCCESS_TOPIC}"
52
+ echo " SNS error: ${ASYNC_SNS_ERROR_TOPIC}"
53
+ <% if (asyncMaxConcurrentInvocations) { %>
54
+ echo " Max concurrent: ${ASYNC_MAX_CONCURRENT_INVOCATIONS}"
55
+ <% } %>
56
+ <% } else if (deploymentTarget === 'batch-transform') { %>
57
+ echo " Instance count: ${BATCH_INSTANCE_COUNT}"
58
+ echo " S3 input: ${BATCH_INPUT_PATH}"
59
+ echo " S3 output: ${BATCH_OUTPUT_PATH}"
60
+ echo " Split type: ${BATCH_SPLIT_TYPE}"
61
+ echo " Strategy: ${BATCH_STRATEGY}"
62
+ <% } %>
63
+
64
+ # Check AWS credentials
65
+ echo "🔍 Validating AWS credentials..."
66
+ if ! aws sts get-caller-identity &> /dev/null; then
67
+ echo "❌ AWS credentials not configured"
68
+ echo " Run: aws configure"
69
+ echo " Or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"
70
+ exit 4
71
+ fi
72
+
73
+ AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
74
+ echo "✅ AWS credentials validated (Account: ${AWS_ACCOUNT_ID})"
75
+
76
+ # Source shared helpers
77
+ source "${SCRIPT_DIR}/lib/wait.sh"
78
+ source "${SCRIPT_DIR}/lib/endpoint-config.sh"
79
+
80
+ # Validate execution role ARN
81
+ if [ -z "${ROLE_ARN:-}" ]; then
82
+ echo "❌ Execution role ARN not provided"
83
+ echo ""
84
+ echo "Usage:"
85
+ echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
86
+ echo " ./do/deploy"
87
+ echo ""
88
+ echo "Or set ROLE_ARN in do/config"
89
+ echo ""
90
+ echo "The execution role must have permissions for:"
91
+ echo " • SageMaker model and endpoint management"
92
+ echo " • Access to the Marketplace model package"
93
+ echo " • CloudWatch Logs"
94
+ exit 3
95
+ fi
96
+
97
+ echo " Using execution role: ${ROLE_ARN}"
98
+
99
+ <% if (deploymentTarget === 'realtime-inference') { %>
100
+ # ============================================================
101
+ # SageMaker Real-Time Inference Deployment (Model-Based)
102
+ # Marketplace packages use the classic model-based flow:
103
+ # CreateModel(ModelPackageName) → CreateEndpointConfig → CreateEndpoint
104
+ # ============================================================
105
+
106
+ # ============================================================
107
+ # Idempotency: check for existing deployment from a previous run
108
+ # ============================================================
109
+ SKIP_TO=""
110
+
111
+ if [ "${FORCE_NEW}" = true ]; then
112
+ echo "🔄 --force: ignoring previous deployment, creating new resources."
113
+ elif [ -n "${ENDPOINT_NAME:-}" ]; then
114
+ echo "🔍 Checking for existing deployment: ${ENDPOINT_NAME}"
115
+
116
+ EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
117
+
118
+ case "${EP_STATUS}" in
119
+ InService)
120
+ echo "✅ Endpoint already InService: ${ENDPOINT_NAME}"
121
+ echo ""
122
+ echo "📋 Deployment is already live. Nothing to do."
123
+ echo " Endpoint: ${ENDPOINT_NAME}"
124
+ echo ""
125
+ echo "🧪 Test your endpoint:"
126
+ echo " ./do/test"
127
+ echo ""
128
+ echo "🧹 Clean up when done:"
129
+ echo " ./do/clean endpoint"
130
+ exit 0
131
+ ;;
132
+ Creating|Updating)
133
+ echo "⏳ Endpoint still ${EP_STATUS}: ${ENDPOINT_NAME}"
134
+ SKIP_TO="wait_endpoint"
135
+ ;;
136
+ Failed)
137
+ echo "⚠️ Previous endpoint failed: ${ENDPOINT_NAME}"
138
+ echo " Creating a new deployment. Clean up the failed endpoint with:"
139
+ echo " ./do/clean endpoint"
140
+ echo ""
141
+ ;;
142
+ "")
143
+ echo " Previous endpoint not found (may have been cleaned up). Creating new deployment."
144
+ ;;
145
+ *)
146
+ echo " Endpoint in unexpected state: ${EP_STATUS}. Creating new deployment."
147
+ ;;
148
+ esac
149
+ fi
150
+
151
+ # ============================================================
152
+ # Create resources (skip if resuming from wait)
153
+ # ============================================================
154
+ if [ -z "${SKIP_TO}" ]; then
155
+ TIMESTAMP=$(date +%s)
156
+ MODEL_NAME_SM="${PROJECT_NAME}-mkt-model-${TIMESTAMP}"
157
+ ENDPOINT_CONFIG_NAME="${PROJECT_NAME}-mkt-epc-${TIMESTAMP}"
158
+ ENDPOINT_NAME="${PROJECT_NAME}-mkt-ep-${TIMESTAMP}"
159
+
160
+ _update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
161
+ _update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
162
+ _update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
163
+
164
+ # Step 1: Create SageMaker model from Marketplace model package
165
+ echo "📦 Creating SageMaker model from Marketplace package: ${MODEL_NAME_SM}"
166
+ if ! aws sagemaker create-model \
167
+ --model-name "${MODEL_NAME_SM}" \
168
+ --primary-container "{\"ModelPackageName\":\"${MODEL_PACKAGE_ARN}\"}" \
169
+ --execution-role-arn "${ROLE_ARN}" \
170
+ --region "${AWS_REGION}"; then
171
+
172
+ echo "❌ Failed to create model from package ARN. Check IAM permissions and subscription status."
173
+ echo " Check that:"
174
+ echo " • The model package ARN is correct: ${MODEL_PACKAGE_ARN}"
175
+ echo " • Your Marketplace subscription is active"
176
+ echo " • The execution role has permission to access the model package"
177
+ exit 4
178
+ fi
179
+
180
+ echo "✅ SageMaker model created: ${MODEL_NAME_SM}"
181
+
182
+ # Record model in manifest (non-blocking)
183
+ MODEL_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${MODEL_NAME_SM}"
184
+ ./do/manifest add \
185
+ --type sagemaker-model \
186
+ --id "${MODEL_ARN}" \
187
+ --project "${PROJECT_NAME}" \
188
+ --meta "{\"modelName\":\"${MODEL_NAME_SM}\",\"modelPackageArn\":\"${MODEL_PACKAGE_ARN}\",\"region\":\"${AWS_REGION}\"}" \
189
+ 2>/dev/null || true
190
+
191
+ # Step 2: Create endpoint configuration
192
+ # Set MODEL_NAME_SM so endpoint-config.sh uses model-based flow (no --execution-role-arn on epc)
193
+ VARIANT_JSON="[{\"VariantName\":\"AllTraffic\",\"ModelName\":\"${MODEL_NAME_SM}\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1}]"
194
+
195
+ echo "⚙️ Creating endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
196
+ if ! aws sagemaker create-endpoint-config \
197
+ --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
198
+ --production-variants "${VARIANT_JSON}" \
199
+ --region "${AWS_REGION}"; then
200
+
201
+ echo "❌ Failed to create endpoint configuration"
202
+ echo " Check that:"
203
+ echo " • The instance type is valid: ${INSTANCE_TYPE}"
204
+ echo " • The instance type is available in region: ${AWS_REGION}"
205
+ echo " • You have sufficient service quota for the instance type"
206
+ exit 4
207
+ fi
208
+
209
+ echo "✅ Endpoint configuration created: ${ENDPOINT_CONFIG_NAME}"
210
+
211
+ # Record endpoint config in manifest (non-blocking)
212
+ ENDPOINT_CONFIG_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${ENDPOINT_CONFIG_NAME}"
213
+ ./do/manifest add \
214
+ --type sagemaker-endpoint-config \
215
+ --id "${ENDPOINT_CONFIG_ARN}" \
216
+ --project "${PROJECT_NAME}" \
217
+ --meta "{\"endpointConfigName\":\"${ENDPOINT_CONFIG_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
218
+ 2>/dev/null || true
219
+
220
+ # Step 3: Create endpoint
221
+ echo "🚀 Creating endpoint: ${ENDPOINT_NAME}"
222
+ if ! aws sagemaker create-endpoint \
223
+ --endpoint-name "${ENDPOINT_NAME}" \
224
+ --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
225
+ --region "${AWS_REGION}"; then
226
+
227
+ echo "❌ Failed to create endpoint"
228
+ echo " Check that:"
229
+ echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
230
+ echo " • You have sufficient service quota in region: ${AWS_REGION}"
231
+ exit 4
232
+ fi
233
+
234
+ echo "✅ Endpoint creation initiated: ${ENDPOINT_NAME}"
235
+
236
+ # Record endpoint in manifest (non-blocking)
237
+ ENDPOINT_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${ENDPOINT_NAME}"
238
+ ./do/manifest add \
239
+ --type sagemaker-endpoint \
240
+ --id "${ENDPOINT_ARN}" \
241
+ --project "${PROJECT_NAME}" \
242
+ --meta "{\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
243
+ 2>/dev/null || true
244
+ fi
245
+
246
+ # ============================================================
247
+ # Wait for endpoint
248
+ # ============================================================
249
+ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
250
+ echo "⏳ Waiting for endpoint to reach InService status..."
251
+ echo " This may take several minutes..."
252
+ echo " If this times out, re-run ./do/deploy to resume."
253
+
254
+ wait_endpoint "${ENDPOINT_NAME}"
255
+ fi
256
+
257
+ echo "✅ Deployment complete!"
258
+ echo ""
259
+ echo "📋 Deployment Details:"
260
+ echo " Endpoint: ${ENDPOINT_NAME}"
261
+ echo " Endpoint Config: ${ENDPOINT_CONFIG_NAME}"
262
+ echo " Model: ${SAGEMAKER_MODEL_NAME:-${MODEL_NAME_SM:-N/A}}"
263
+ echo " Model Package: ${MODEL_PACKAGE_ARN}"
264
+ echo " Region: ${AWS_REGION}"
265
+ echo " Instance Type: ${INSTANCE_TYPE}"
266
+ echo ""
267
+ echo "📋 What's next?"
268
+ echo " • Test your endpoint: ./do/test"
269
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
270
+ echo " • Benchmark performance: ./do/benchmark"
271
+ <% } %>
272
+ echo " • View endpoint status: ./do/status"
273
+ echo " • Register this deployment: ./do/register"
274
+ echo " • View logs: ./do/logs"
275
+ echo " • Clean up when done: ./do/clean endpoint"
276
+
277
+ <% } else if (deploymentTarget === 'async-inference') { %>
278
+ # ============================================================
279
+ # SageMaker Async Inference Deployment (Model-Based)
280
+ # Marketplace packages use: CreateModel(ModelPackageName) → CreateEndpointConfig(AsyncInferenceConfig) → CreateEndpoint
281
+ # ============================================================
282
+
283
+ # ============================================================
284
+ # Bootstrap async infrastructure (S3 bucket + SNS topics)
285
+ # ============================================================
286
+
287
+ # Extract bucket name from S3 output path
288
+ ASYNC_S3_BUCKET=$(echo "${ASYNC_S3_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
289
+
290
+ <% if (!asyncS3OutputPath) { %>
291
+ # Bootstrap default S3 bucket (check-and-create)
292
+ echo "🔍 Checking if S3 bucket exists: ${ASYNC_S3_BUCKET}"
293
+ if ! aws s3api head-bucket --bucket "${ASYNC_S3_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
294
+ echo "📦 Creating S3 bucket: ${ASYNC_S3_BUCKET}"
295
+ if [ "${AWS_REGION}" = "us-east-1" ]; then
296
+ if ! aws s3api create-bucket \
297
+ --bucket "${ASYNC_S3_BUCKET}" \
298
+ --region "${AWS_REGION}"; then
299
+ echo "❌ Failed to create S3 bucket: ${ASYNC_S3_BUCKET}"
300
+ exit 4
301
+ fi
302
+ else
303
+ if ! aws s3api create-bucket \
304
+ --bucket "${ASYNC_S3_BUCKET}" \
305
+ --region "${AWS_REGION}" \
306
+ --create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
307
+ echo "❌ Failed to create S3 bucket: ${ASYNC_S3_BUCKET}"
308
+ exit 4
309
+ fi
310
+ fi
311
+ echo "✅ S3 bucket created: ${ASYNC_S3_BUCKET}"
312
+ else
313
+ echo "✅ S3 bucket exists: ${ASYNC_S3_BUCKET}"
314
+ fi
315
+ <% } else { %>
316
+ # Custom S3 output path provided — skip bucket creation
317
+ echo "✅ Using custom S3 output path: ${ASYNC_S3_OUTPUT_PATH}"
318
+ <% } %>
319
+
320
+ # Extract topic name from SNS success topic ARN
321
+ ASYNC_SNS_SUCCESS_TOPIC_NAME=$(echo "${ASYNC_SNS_SUCCESS_TOPIC}" | awk -F: '{print $NF}')
322
+
323
+ <% if (!asyncSnsSuccessTopic) { %>
324
+ # Bootstrap default SNS success topic (check-and-create)
325
+ echo "🔍 Checking if SNS success topic exists: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
326
+ if ! aws sns get-topic-attributes --topic-arn "${ASYNC_SNS_SUCCESS_TOPIC}" --region "${AWS_REGION}" 2>/dev/null; then
327
+ echo "📦 Creating SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
328
+ if ! aws sns create-topic \
329
+ --name "${ASYNC_SNS_SUCCESS_TOPIC_NAME}" \
330
+ --region "${AWS_REGION}" > /dev/null; then
331
+ echo "❌ Failed to create SNS success topic"
332
+ exit 4
333
+ fi
334
+ echo "✅ SNS success topic created: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
335
+ else
336
+ echo "✅ SNS success topic exists: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
337
+ fi
338
+ <% } else { %>
339
+ # Custom SNS success topic ARN provided — skip topic creation
340
+ echo "✅ Using custom SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC}"
341
+ <% } %>
342
+
343
+ # Extract topic name from SNS error topic ARN
344
+ ASYNC_SNS_ERROR_TOPIC_NAME=$(echo "${ASYNC_SNS_ERROR_TOPIC}" | awk -F: '{print $NF}')
345
+
346
+ <% if (!asyncSnsErrorTopic) { %>
347
+ # Bootstrap default SNS error topic (check-and-create)
348
+ echo "🔍 Checking if SNS error topic exists: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
349
+ if ! aws sns get-topic-attributes --topic-arn "${ASYNC_SNS_ERROR_TOPIC}" --region "${AWS_REGION}" 2>/dev/null; then
350
+ echo "📦 Creating SNS error topic: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
351
+ if ! aws sns create-topic \
352
+ --name "${ASYNC_SNS_ERROR_TOPIC_NAME}" \
353
+ --region "${AWS_REGION}" > /dev/null; then
354
+ echo "❌ Failed to create SNS error topic"
355
+ exit 4
356
+ fi
357
+ echo "✅ SNS error topic created: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
358
+ else
359
+ echo "✅ SNS error topic exists: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
360
+ fi
361
+ <% } else { %>
362
+ # Custom SNS error topic ARN provided — skip topic creation
363
+ echo "✅ Using custom SNS error topic: ${ASYNC_SNS_ERROR_TOPIC}"
364
+ <% } %>
365
+
366
+ # ============================================================
367
+ # Idempotency: check for existing deployment from a previous run
368
+ # ============================================================
369
+ SKIP_TO=""
370
+
371
+ if [ "${FORCE_NEW}" = true ]; then
372
+ echo "🔄 --force: ignoring previous deployment, creating new resources."
373
+ elif [ -n "${ENDPOINT_NAME:-}" ]; then
374
+ echo "🔍 Checking for existing deployment: ${ENDPOINT_NAME}"
375
+
376
+ EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
377
+
378
+ case "${EP_STATUS}" in
379
+ InService)
380
+ echo "✅ Async endpoint already InService: ${ENDPOINT_NAME}"
381
+ echo ""
382
+ echo "📋 Deployment is already live. Nothing to do."
383
+ echo " Endpoint: ${ENDPOINT_NAME}"
384
+ echo ""
385
+ echo "🧪 Test your async endpoint:"
386
+ echo " ./do/test"
387
+ echo ""
388
+ echo "🧹 Clean up when done:"
389
+ echo " ./do/clean endpoint"
390
+ exit 0
391
+ ;;
392
+ Creating|Updating)
393
+ echo "⏳ Endpoint still ${EP_STATUS}: ${ENDPOINT_NAME}"
394
+ SKIP_TO="wait_endpoint"
395
+ ;;
396
+ Failed)
397
+ echo "⚠️ Previous endpoint failed: ${ENDPOINT_NAME}"
398
+ echo " Creating a new deployment. Clean up the failed endpoint with:"
399
+ echo " ./do/clean endpoint"
400
+ echo ""
401
+ ;;
402
+ "")
403
+ echo " Previous endpoint not found (may have been cleaned up). Creating new deployment."
404
+ ;;
405
+ *)
406
+ echo " Endpoint in unexpected state: ${EP_STATUS}. Creating new deployment."
407
+ ;;
408
+ esac
409
+ fi
410
+
411
+ # ============================================================
412
+ # Create async resources (skip if resuming from wait)
413
+ # ============================================================
414
+ if [ -z "${SKIP_TO}" ]; then
415
+ TIMESTAMP=$(date +%s)
416
+ MODEL_NAME_SM="${PROJECT_NAME}-mkt-async-model-${TIMESTAMP}"
417
+ ENDPOINT_CONFIG_NAME="${PROJECT_NAME}-mkt-async-epc-${TIMESTAMP}"
418
+ ENDPOINT_NAME="${PROJECT_NAME}-mkt-async-ep-${TIMESTAMP}"
419
+
420
+ _update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
421
+ _update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
422
+ _update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
423
+
424
+ # Step 1: Create SageMaker model from Marketplace model package
425
+ echo "📦 Creating SageMaker model from Marketplace package: ${MODEL_NAME_SM}"
426
+ if ! aws sagemaker create-model \
427
+ --model-name "${MODEL_NAME_SM}" \
428
+ --primary-container "{\"ModelPackageName\":\"${MODEL_PACKAGE_ARN}\"}" \
429
+ --execution-role-arn "${ROLE_ARN}" \
430
+ --region "${AWS_REGION}"; then
431
+
432
+ echo "❌ Failed to create model from package ARN. Check IAM permissions and subscription status."
433
+ echo " Check that:"
434
+ echo " • The model package ARN is correct: ${MODEL_PACKAGE_ARN}"
435
+ echo " • Your Marketplace subscription is active"
436
+ echo " • The execution role has permission to access the model package"
437
+ exit 4
438
+ fi
439
+
440
+ echo "✅ SageMaker model created: ${MODEL_NAME_SM}"
441
+
442
+ # Record model in manifest (non-blocking)
443
+ MODEL_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${MODEL_NAME_SM}"
444
+ ./do/manifest add \
445
+ --type sagemaker-model \
446
+ --id "${MODEL_ARN}" \
447
+ --project "${PROJECT_NAME}" \
448
+ --meta "{\"modelName\":\"${MODEL_NAME_SM}\",\"modelPackageArn\":\"${MODEL_PACKAGE_ARN}\",\"region\":\"${AWS_REGION}\"}" \
449
+ 2>/dev/null || true
450
+
451
+ # Step 2: Build production variant and AsyncInferenceConfig
452
+ VARIANT_JSON="[{\"VariantName\":\"AllTraffic\",\"ModelName\":\"${MODEL_NAME_SM}\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1}]"
453
+
454
+ ASYNC_CONFIG="{\"OutputConfig\":{\"S3OutputPath\":\"${ASYNC_S3_OUTPUT_PATH}\",\"NotificationConfig\":{\"SuccessTopic\":\"${ASYNC_SNS_SUCCESS_TOPIC}\",\"ErrorTopic\":\"${ASYNC_SNS_ERROR_TOPIC}\"}}"
455
+ if [ -n "${ASYNC_MAX_CONCURRENT_INVOCATIONS:-}" ]; then
456
+ ASYNC_CONFIG="${ASYNC_CONFIG},\"ClientConfig\":{\"MaxConcurrentInvocationsPerInstance\":${ASYNC_MAX_CONCURRENT_INVOCATIONS}}"
457
+ fi
458
+ ASYNC_CONFIG="${ASYNC_CONFIG}}"
459
+
460
+ # Step 3: Create endpoint configuration with AsyncInferenceConfig
461
+ echo "⚙️ Creating async endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
462
+ if ! aws sagemaker create-endpoint-config \
463
+ --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
464
+ --production-variants "${VARIANT_JSON}" \
465
+ --async-inference-config "${ASYNC_CONFIG}" \
466
+ --region "${AWS_REGION}"; then
467
+
468
+ echo "❌ Failed to create async endpoint configuration"
469
+ echo " Check that:"
470
+ echo " • The S3 output path is accessible: ${ASYNC_S3_OUTPUT_PATH}"
471
+ echo " • The IAM role has s3:PutObject permission on the output path"
472
+ echo " • The instance type is valid: ${INSTANCE_TYPE}"
473
+ echo " • You have sufficient service quota for the instance type"
474
+ exit 4
475
+ fi
476
+
477
+ echo "✅ Async endpoint configuration created: ${ENDPOINT_CONFIG_NAME}"
478
+
479
+ # Record endpoint config in manifest (non-blocking)
480
+ ENDPOINT_CONFIG_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${ENDPOINT_CONFIG_NAME}"
481
+ ./do/manifest add \
482
+ --type sagemaker-endpoint-config \
483
+ --id "${ENDPOINT_CONFIG_ARN}" \
484
+ --project "${PROJECT_NAME}" \
485
+ --meta "{\"endpointConfigName\":\"${ENDPOINT_CONFIG_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
486
+ 2>/dev/null || true
487
+
488
+ # Step 4: Create endpoint
489
+ echo "🚀 Creating async endpoint: ${ENDPOINT_NAME}"
490
+ if ! aws sagemaker create-endpoint \
491
+ --endpoint-name "${ENDPOINT_NAME}" \
492
+ --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
493
+ --region "${AWS_REGION}"; then
494
+
495
+ echo "❌ Failed to create async endpoint"
496
+ echo " Check that:"
497
+ echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
498
+ echo " • You have sufficient service quota in region: ${AWS_REGION}"
499
+ exit 4
500
+ fi
501
+
502
+ echo "✅ Async endpoint creation initiated: ${ENDPOINT_NAME}"
503
+
504
+ # Record endpoint in manifest (non-blocking)
505
+ ENDPOINT_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${ENDPOINT_NAME}"
506
+ ./do/manifest add \
507
+ --type sagemaker-endpoint \
508
+ --id "${ENDPOINT_ARN}" \
509
+ --project "${PROJECT_NAME}" \
510
+ --meta "{\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
511
+ 2>/dev/null || true
512
+ fi
513
+
514
+ # ============================================================
515
+ # Wait for endpoint
516
+ # ============================================================
517
+ if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
518
+ echo "⏳ Waiting for async endpoint to reach InService status..."
519
+ echo " This may take several minutes..."
520
+ echo " If this times out, re-run ./do/deploy to resume."
521
+
522
+ wait_endpoint "${ENDPOINT_NAME}"
523
+ fi
524
+
525
+ echo "✅ Async deployment complete!"
526
+ echo ""
527
+ echo "📋 Deployment Details:"
528
+ echo " Endpoint: ${ENDPOINT_NAME}"
529
+ echo " Endpoint Config: ${ENDPOINT_CONFIG_NAME}"
530
+ echo " Model: ${SAGEMAKER_MODEL_NAME:-${MODEL_NAME_SM:-N/A}}"
531
+ echo " Model Package: ${MODEL_PACKAGE_ARN}"
532
+ echo " Region: ${AWS_REGION}"
533
+ echo " Instance Type: ${INSTANCE_TYPE}"
534
+ echo " S3 Output: ${ASYNC_S3_OUTPUT_PATH}"
535
+ echo " SNS Success: ${ASYNC_SNS_SUCCESS_TOPIC}"
536
+ echo " SNS Error: ${ASYNC_SNS_ERROR_TOPIC}"
537
+ echo ""
538
+ echo "📋 What's next?"
539
+ echo " • Test your async endpoint: ./do/test"
540
+ echo " • Check async output: aws s3 ls ${ASYNC_S3_OUTPUT_PATH}"
541
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
542
+ echo " • Benchmark performance: ./do/benchmark"
543
+ <% } %>
544
+ echo " • Register this deployment: ./do/register"
545
+ echo " • View logs: ./do/logs"
546
+ echo " • Clean up when done: ./do/clean endpoint"
547
+
548
+ <% } else if (deploymentTarget === 'batch-transform') { %>
549
+ # ============================================================
550
+ # SageMaker Batch Transform Deployment
551
+ # Marketplace packages use: CreateModel(ModelPackageName) → CreateTransformJob
552
+ # ============================================================
553
+
554
+ # Validate S3 input path
555
+ if [ -z "${BATCH_INPUT_PATH:-}" ]; then
556
+ echo "❌ S3 input path not provided"
557
+ echo ""
558
+ echo "Set BATCH_INPUT_PATH in do/config or provide via CLI:"
559
+ echo " export BATCH_INPUT_PATH=s3://my-bucket/input/"
560
+ echo " ./do/deploy"
561
+ exit 3
562
+ fi
563
+
564
+ if [[ "${BATCH_INPUT_PATH}" != s3://* ]]; then
565
+ echo "❌ S3 input path must start with s3://"
566
+ echo " Current value: ${BATCH_INPUT_PATH}"
567
+ exit 3
568
+ fi
569
+
570
+ # Validate S3 output path
571
+ if [ -z "${BATCH_OUTPUT_PATH:-}" ]; then
572
+ echo "❌ S3 output path not provided"
573
+ echo ""
574
+ echo "Set BATCH_OUTPUT_PATH in do/config or provide via CLI:"
575
+ echo " export BATCH_OUTPUT_PATH=s3://my-bucket/output/"
576
+ echo " ./do/deploy"
577
+ exit 3
578
+ fi
579
+
580
+ if [[ "${BATCH_OUTPUT_PATH}" != s3://* ]]; then
581
+ echo "❌ S3 output path must start with s3://"
582
+ echo " Current value: ${BATCH_OUTPUT_PATH}"
583
+ exit 3
584
+ fi
585
+
586
+ # ============================================================
587
+ # Bootstrap S3 buckets for batch transform
588
+ # ============================================================
589
+
590
+ BATCH_INPUT_BUCKET=$(echo "${BATCH_INPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
591
+ BATCH_OUTPUT_BUCKET=$(echo "${BATCH_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
592
+
593
+ <% if (!batchInputPath) { %>
594
+ # Bootstrap default S3 input bucket (check-and-create)
595
+ echo "🔍 Checking if S3 input bucket exists: ${BATCH_INPUT_BUCKET}"
596
+ if ! aws s3api head-bucket --bucket "${BATCH_INPUT_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
597
+ echo "📦 Creating S3 input bucket: ${BATCH_INPUT_BUCKET}"
598
+ if [ "${AWS_REGION}" = "us-east-1" ]; then
599
+ if ! aws s3api create-bucket \
600
+ --bucket "${BATCH_INPUT_BUCKET}" \
601
+ --region "${AWS_REGION}"; then
602
+ echo "❌ Failed to create S3 input bucket: ${BATCH_INPUT_BUCKET}"
603
+ exit 4
604
+ fi
605
+ else
606
+ if ! aws s3api create-bucket \
607
+ --bucket "${BATCH_INPUT_BUCKET}" \
608
+ --region "${AWS_REGION}" \
609
+ --create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
610
+ echo "❌ Failed to create S3 input bucket: ${BATCH_INPUT_BUCKET}"
611
+ exit 4
612
+ fi
613
+ fi
614
+ echo "✅ S3 input bucket created: ${BATCH_INPUT_BUCKET}"
615
+ else
616
+ echo "✅ S3 input bucket exists: ${BATCH_INPUT_BUCKET}"
617
+ fi
618
+
619
+ # Upload sample input file if the input prefix is empty
620
+ EXISTING_OBJECTS=$(aws s3 ls "${BATCH_INPUT_PATH}" --region "${AWS_REGION}" 2>/dev/null | head -1 || true)
621
+ if [ -z "${EXISTING_OBJECTS}" ]; then
622
+ echo "📄 Uploading sample input file to ${BATCH_INPUT_PATH}"
623
+ echo '{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50}}' | aws s3 cp - "${BATCH_INPUT_PATH}sample.jsonl" --region "${AWS_REGION}"
624
+ echo "✅ Sample input uploaded: ${BATCH_INPUT_PATH}sample.jsonl"
625
+ echo " ⚠️ Replace this with your actual input data before running production jobs"
626
+ fi
627
+ <% } else { %>
628
+ # Custom S3 input path provided — skip bucket creation
629
+ echo "✅ Using custom S3 input path: ${BATCH_INPUT_PATH}"
630
+ <% } %>
631
+
632
+ <% if (!batchOutputPath) { %>
633
+ # Bootstrap default S3 output bucket (check-and-create, may be same as input)
634
+ if [ "${BATCH_OUTPUT_BUCKET}" != "${BATCH_INPUT_BUCKET}" ]; then
635
+ echo "🔍 Checking if S3 output bucket exists: ${BATCH_OUTPUT_BUCKET}"
636
+ if ! aws s3api head-bucket --bucket "${BATCH_OUTPUT_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
637
+ echo "📦 Creating S3 output bucket: ${BATCH_OUTPUT_BUCKET}"
638
+ if [ "${AWS_REGION}" = "us-east-1" ]; then
639
+ if ! aws s3api create-bucket \
640
+ --bucket "${BATCH_OUTPUT_BUCKET}" \
641
+ --region "${AWS_REGION}"; then
642
+ echo "❌ Failed to create S3 output bucket: ${BATCH_OUTPUT_BUCKET}"
643
+ exit 4
644
+ fi
645
+ else
646
+ if ! aws s3api create-bucket \
647
+ --bucket "${BATCH_OUTPUT_BUCKET}" \
648
+ --region "${AWS_REGION}" \
649
+ --create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
650
+ echo "❌ Failed to create S3 output bucket: ${BATCH_OUTPUT_BUCKET}"
651
+ exit 4
652
+ fi
653
+ fi
654
+ echo "✅ S3 output bucket created: ${BATCH_OUTPUT_BUCKET}"
655
+ else
656
+ echo "✅ S3 output bucket exists: ${BATCH_OUTPUT_BUCKET}"
657
+ fi
658
+ else
659
+ echo "✅ S3 output bucket same as input: ${BATCH_OUTPUT_BUCKET}"
660
+ fi
661
+ <% } else { %>
662
+ # Custom S3 output path provided — skip bucket creation
663
+ echo "✅ Using custom S3 output path: ${BATCH_OUTPUT_PATH}"
664
+ <% } %>
665
+
666
+ # ============================================================
667
+ # Check for previous transform job still running
668
+ # ============================================================
669
+ if [ "${FORCE_NEW}" != true ] && [ -n "${TRANSFORM_JOB_NAME:-}" ]; then
670
+ echo "🔍 Checking previous transform job: ${TRANSFORM_JOB_NAME}"
671
+ PREV_JOB_STATUS=$(aws sagemaker describe-transform-job \
672
+ --transform-job-name "${TRANSFORM_JOB_NAME}" \
673
+ --region "${AWS_REGION}" \
674
+ --query "TransformJobStatus" \
675
+ --output text 2>/dev/null || echo "")
676
+
677
+ case "${PREV_JOB_STATUS}" in
678
+ InProgress)
679
+ echo "⚠️ Previous transform job is still running: ${TRANSFORM_JOB_NAME}"
680
+ echo " Wait for it to complete, or stop it with:"
681
+ echo " aws sagemaker stop-transform-job --transform-job-name ${TRANSFORM_JOB_NAME} --region ${AWS_REGION}"
682
+ echo ""
683
+ echo " Use --force to create a new job anyway."
684
+ exit 4
685
+ ;;
686
+ Completed)
687
+ echo "✅ Previous transform job completed: ${TRANSFORM_JOB_NAME}"
688
+ echo " Creating a new job. Results from the previous job are in:"
689
+ echo " ${BATCH_OUTPUT_PATH}"
690
+ echo ""
691
+ ;;
692
+ *)
693
+ # Failed, Stopped, or not found — proceed with new job
694
+ ;;
695
+ esac
696
+ fi
697
+
698
+ # Generate unique names with timestamp
699
+ TIMESTAMP=$(date +%s)
700
+ MODEL_NAME_SM="${PROJECT_NAME}-mkt-batch-model-${TIMESTAMP}"
701
+ TRANSFORM_JOB_NAME="${PROJECT_NAME}-mkt-batch-job-${TIMESTAMP}"
702
+
703
+ _update_config_var "TRANSFORM_JOB_NAME" "${TRANSFORM_JOB_NAME}"
704
+ _update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
705
+
706
+ # Step 1: Create SageMaker model from Marketplace model package
707
+ echo "📦 Creating SageMaker model from Marketplace package: ${MODEL_NAME_SM}"
708
+ if ! aws sagemaker create-model \
709
+ --model-name "${MODEL_NAME_SM}" \
710
+ --primary-container "{\"ModelPackageName\":\"${MODEL_PACKAGE_ARN}\"}" \
711
+ --execution-role-arn "${ROLE_ARN}" \
712
+ --region "${AWS_REGION}"; then
713
+
714
+ echo "❌ Failed to create model from package ARN. Check IAM permissions and subscription status."
715
+ echo " Check that:"
716
+ echo " • The model package ARN is correct: ${MODEL_PACKAGE_ARN}"
717
+ echo " • Your Marketplace subscription is active"
718
+ echo " • The execution role has permission to access the model package"
719
+ exit 4
720
+ fi
721
+
722
+ echo "✅ SageMaker model created: ${MODEL_NAME_SM}"
723
+
724
+ # Record model in manifest (non-blocking)
725
+ MODEL_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${MODEL_NAME_SM}"
726
+ ./do/manifest add \
727
+ --type sagemaker-model \
728
+ --id "${MODEL_ARN}" \
729
+ --project "${PROJECT_NAME}" \
730
+ --meta "{\"modelName\":\"${MODEL_NAME_SM}\",\"modelPackageArn\":\"${MODEL_PACKAGE_ARN}\",\"region\":\"${AWS_REGION}\"}" \
731
+ 2>/dev/null || true
732
+
733
+ # Step 2: Build transform job JSON
734
+ TRANSFORM_JOB_JSON="{
735
+ \"TransformJobName\": \"${TRANSFORM_JOB_NAME}\",
736
+ \"ModelName\": \"${MODEL_NAME_SM}\",
737
+ \"TransformInput\": {
738
+ \"DataSource\": {
739
+ \"S3DataSource\": {
740
+ \"S3DataType\": \"S3Prefix\",
741
+ \"S3Uri\": \"${BATCH_INPUT_PATH}\"
742
+ }
743
+ },
744
+ \"ContentType\": \"application/json\",
745
+ \"SplitType\": \"${BATCH_SPLIT_TYPE}\"
746
+ },
747
+ \"TransformOutput\": {
748
+ \"S3OutputPath\": \"${BATCH_OUTPUT_PATH}\"
749
+ $([ "${BATCH_JOIN_SOURCE:-None}" = "Input" ] && echo ",\"Accept\": \"application/json\", \"AssembleWith\": \"${BATCH_SPLIT_TYPE}\"")
750
+ },
751
+ \"TransformResources\": {
752
+ \"InstanceType\": \"${INSTANCE_TYPE}\",
753
+ \"InstanceCount\": ${BATCH_INSTANCE_COUNT}
754
+ },
755
+ \"MaxConcurrentTransforms\": ${BATCH_MAX_CONCURRENT_TRANSFORMS:-1},
756
+ \"MaxPayloadInMB\": ${BATCH_MAX_PAYLOAD_IN_MB:-6},
757
+ \"BatchStrategy\": \"${BATCH_STRATEGY}\"
758
+ $([ "${BATCH_JOIN_SOURCE:-None}" = "Input" ] && echo ",\"DataProcessing\": { \"JoinSource\": \"Input\" }")
759
+ }"
760
+
761
+ # Step 3: Create transform job
762
+ echo "🚀 Creating transform job: ${TRANSFORM_JOB_NAME}"
763
+ if ! aws sagemaker create-transform-job \
764
+ --cli-input-json "${TRANSFORM_JOB_JSON}" \
765
+ --region "${AWS_REGION}"; then
766
+
767
+ echo "❌ Failed to create transform job"
768
+ echo " Check that:"
769
+ echo " • The S3 input path exists and is accessible: ${BATCH_INPUT_PATH}"
770
+ echo " • The S3 output path is writable: ${BATCH_OUTPUT_PATH}"
771
+ echo " • The instance type is valid: ${INSTANCE_TYPE}"
772
+ echo " • You have sufficient service quota for the instance type"
773
+ exit 4
774
+ fi
775
+
776
+ echo "✅ Transform job created: ${TRANSFORM_JOB_NAME}"
777
+
778
+ # Record transform job in manifest (non-blocking)
779
+ TRANSFORM_JOB_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:transform-job/${TRANSFORM_JOB_NAME}"
780
+ ./do/manifest add \
781
+ --type sagemaker-transform-job \
782
+ --id "${TRANSFORM_JOB_ARN}" \
783
+ --project "${PROJECT_NAME}" \
784
+ --meta "{\"transformJobName\":\"${TRANSFORM_JOB_NAME}\",\"modelName\":\"${MODEL_NAME_SM}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
785
+ 2>/dev/null || true
786
+
787
+ # Step 4: Poll transform job status until completion or failure
788
+ echo "⏳ Waiting for transform job to complete..."
789
+ echo " This may take several minutes depending on dataset size..."
790
+ echo " If this times out, check status with:"
791
+ echo " aws sagemaker describe-transform-job --transform-job-name ${TRANSFORM_JOB_NAME} --region ${AWS_REGION}"
792
+ echo ""
793
+
794
+ while true; do
795
+ JOB_STATUS=$(aws sagemaker describe-transform-job \
796
+ --transform-job-name "${TRANSFORM_JOB_NAME}" \
797
+ --region "${AWS_REGION}" \
798
+ --query "TransformJobStatus" \
799
+ --output text 2>&1) || {
800
+ if echo "${JOB_STATUS}" | grep -qi "expired\|token"; then
801
+ echo ""
802
+ echo "⚠️ Credentials expired, but the transform job is still running."
803
+ echo " Refresh your credentials and check status with:"
804
+ echo " aws sagemaker describe-transform-job --transform-job-name ${TRANSFORM_JOB_NAME} --region ${AWS_REGION} --query TransformJobStatus"
805
+ exit 4
806
+ fi
807
+ echo "❌ Failed to describe transform job: ${TRANSFORM_JOB_NAME}"
808
+ echo " Error: ${JOB_STATUS}"
809
+ exit 4
810
+ }
811
+
812
+ case "${JOB_STATUS}" in
813
+ Completed)
814
+ echo "✅ Transform job completed successfully!"
815
+ break
816
+ ;;
817
+ Failed)
818
+ FAILURE_REASON=$(aws sagemaker describe-transform-job \
819
+ --transform-job-name "${TRANSFORM_JOB_NAME}" \
820
+ --region "${AWS_REGION}" \
821
+ --query "FailureReason" \
822
+ --output text 2>/dev/null || echo "Unknown")
823
+ echo "❌ Transform job failed"
824
+ echo " Reason: ${FAILURE_REASON}"
825
+ echo ""
826
+ echo " Check CloudWatch Logs for details:"
827
+ echo " https://console.aws.amazon.com/cloudwatch/home?region=${AWS_REGION}#logsV2:log-groups/log-group//aws/sagemaker/TransformJobs"
828
+ exit 4
829
+ ;;
830
+ Stopped)
831
+ echo "⚠️ Transform job was stopped"
832
+ exit 4
833
+ ;;
834
+ InProgress)
835
+ echo " $(date +%H:%M:%S) Job status: InProgress..."
836
+ sleep 30
837
+ ;;
838
+ *)
839
+ echo " $(date +%H:%M:%S) Job status: ${JOB_STATUS}..."
840
+ sleep 30
841
+ ;;
842
+ esac
843
+ done
844
+
845
+ echo ""
846
+ echo "📋 Deployment Details:"
847
+ echo " Transform Job: ${TRANSFORM_JOB_NAME}"
848
+ echo " Model: ${MODEL_NAME_SM}"
849
+ echo " Model Package: ${MODEL_PACKAGE_ARN}"
850
+ echo " Region: ${AWS_REGION}"
851
+ echo " Instance Type: ${INSTANCE_TYPE}"
852
+ echo " Instance Count: ${BATCH_INSTANCE_COUNT}"
853
+ echo " S3 Input: ${BATCH_INPUT_PATH}"
854
+ echo " S3 Output: ${BATCH_OUTPUT_PATH}"
855
+ echo " Split Type: ${BATCH_SPLIT_TYPE}"
856
+ echo " Strategy: ${BATCH_STRATEGY}"
857
+ echo ""
858
+
859
+ # Download results locally
860
+ LOCAL_OUTPUT_DIR="${SCRIPT_DIR}/../batch-output"
861
+ mkdir -p "${LOCAL_OUTPUT_DIR}"
862
+ echo "📥 Downloading results to ${LOCAL_OUTPUT_DIR}/"
863
+ if aws s3 sync "${BATCH_OUTPUT_PATH}" "${LOCAL_OUTPUT_DIR}/" --region "${AWS_REGION}"; then
864
+ DOWNLOADED=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | wc -l | tr -d ' ')
865
+ echo "✅ Downloaded ${DOWNLOADED} file(s) to ${LOCAL_OUTPUT_DIR}/"
866
+ echo ""
867
+
868
+ # Display first output file preview
869
+ FIRST_FILE=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | head -1)
870
+ if [ -n "${FIRST_FILE}" ]; then
871
+ echo "📄 Sample output (${FIRST_FILE}):"
872
+ head -5 "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}"
873
+ LINES=$(wc -l < "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}" | tr -d ' ')
874
+ if [ "${LINES}" -gt 5 ]; then
875
+ echo " ... (${LINES} total lines)"
876
+ fi
877
+ fi
878
+ else
879
+ echo "⚠️ Could not download output files"
880
+ fi
881
+
882
+ echo ""
883
+ echo "📋 What's next?"
884
+ echo " • View results: cat batch-output/"
885
+ echo " • Review results: ./do/test"
886
+ echo " • Register this deployment: ./do/register"
887
+ echo " • View logs: ./do/logs"
888
+ echo " • Clean up when done: ./do/clean"
889
+
890
+ <% } %>