@aws/ml-container-creator 0.6.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +1 -1
- package/infra/ci-harness/buildspec.yml +4 -0
- package/package.json +1 -1
- package/servers/lib/catalogs/model-servers.json +80 -0
- package/servers/model-picker/index.js +27 -16
- package/src/app.js +89 -21
- package/src/lib/cli-handler.js +1 -1
- package/src/lib/config-manager.js +39 -2
- package/src/lib/cross-cutting-checker.js +146 -33
- package/src/lib/deployment-config-resolver.js +10 -4
- package/src/lib/e2e-bootstrap.js +227 -0
- package/src/lib/e2e-catalog-validator.js +103 -0
- package/src/lib/e2e-quota-validator.js +135 -0
- package/src/lib/prompt-runner.js +290 -22
- package/src/lib/prompts.js +9 -3
- package/src/lib/template-manager.js +10 -4
- package/src/lib/tune-catalog-validator.js +5 -5
- package/templates/Dockerfile +2 -0
- package/templates/code/cw_log_forwarder.py +64 -0
- package/templates/code/serve +14 -3
- package/templates/code/serving.properties +2 -2
- package/templates/deploy_notebook_generator.py +897 -0
- package/templates/diffusors/serve +3 -3
- package/templates/do/.tune_helper.py +2 -2
- package/templates/do/export +19 -2
- package/templates/do/lib/endpoint-config.sh +3 -1
- package/templates/do/lib/inference-component.sh +5 -1
- package/templates/do/register +8 -2
- package/templates/do/test +5 -5
- package/templates/do/tune +2 -2
- package/templates/marketplace/config +118 -0
- package/templates/marketplace/deploy +890 -0
- package/templates/marketplace/test +453 -0
|
@@ -0,0 +1,890 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
#
|
|
5
|
+
# Marketplace model package deployment script.
|
|
6
|
+
# Deploys a pre-built AWS Marketplace model package using CreateModel with ModelPackageName.
|
|
7
|
+
# No build, push, or submit steps — the vendor provides the container and weights.
|
|
8
|
+
|
|
9
|
+
set -e
|
|
10
|
+
set -u
|
|
11
|
+
set -o pipefail
|
|
12
|
+
|
|
13
|
+
# Parse flags
|
|
14
|
+
FORCE_NEW=false
|
|
15
|
+
FORCE_IC=false
|
|
16
|
+
while [ $# -gt 0 ]; do
|
|
17
|
+
case "$1" in
|
|
18
|
+
--force) FORCE_NEW=true; shift ;;
|
|
19
|
+
--force-ic) FORCE_IC=true; shift ;;
|
|
20
|
+
--help|-h)
|
|
21
|
+
echo "Usage: ./do/deploy [--force] [--force-ic]"
|
|
22
|
+
echo ""
|
|
23
|
+
echo "Options:"
|
|
24
|
+
echo " --force Create a new deployment, even if one already exists."
|
|
25
|
+
echo " --force-ic Recreate the endpoint configuration on the existing endpoint."
|
|
26
|
+
echo ""
|
|
27
|
+
echo "Without flags, deploy resumes from the last run."
|
|
28
|
+
exit 0
|
|
29
|
+
;;
|
|
30
|
+
*)
|
|
31
|
+
echo "❌ Unknown option: $1"
|
|
32
|
+
echo " Run ./do/deploy --help for usage."
|
|
33
|
+
exit 1
|
|
34
|
+
;;
|
|
35
|
+
esac
|
|
36
|
+
done
|
|
37
|
+
|
|
38
|
+
# Source configuration
|
|
39
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
40
|
+
source "${SCRIPT_DIR}/config"
|
|
41
|
+
|
|
42
|
+
echo "🚀 Deploying Marketplace Model Package"
|
|
43
|
+
echo " Project: ${PROJECT_NAME}"
|
|
44
|
+
echo " Deployment config: marketplace"
|
|
45
|
+
echo " Region: ${AWS_REGION}"
|
|
46
|
+
echo " Model package: ${MODEL_PACKAGE_ARN}"
|
|
47
|
+
echo " Deployment target: ${DEPLOYMENT_TARGET}"
|
|
48
|
+
echo " Instance type: ${INSTANCE_TYPE}"
|
|
49
|
+
<% if (deploymentTarget === 'async-inference') { %>
|
|
50
|
+
echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
51
|
+
echo " SNS success: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
52
|
+
echo " SNS error: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
53
|
+
<% if (asyncMaxConcurrentInvocations) { %>
|
|
54
|
+
echo " Max concurrent: ${ASYNC_MAX_CONCURRENT_INVOCATIONS}"
|
|
55
|
+
<% } %>
|
|
56
|
+
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
57
|
+
echo " Instance count: ${BATCH_INSTANCE_COUNT}"
|
|
58
|
+
echo " S3 input: ${BATCH_INPUT_PATH}"
|
|
59
|
+
echo " S3 output: ${BATCH_OUTPUT_PATH}"
|
|
60
|
+
echo " Split type: ${BATCH_SPLIT_TYPE}"
|
|
61
|
+
echo " Strategy: ${BATCH_STRATEGY}"
|
|
62
|
+
<% } %>
|
|
63
|
+
|
|
64
|
+
# Check AWS credentials
|
|
65
|
+
echo "🔍 Validating AWS credentials..."
|
|
66
|
+
if ! aws sts get-caller-identity &> /dev/null; then
|
|
67
|
+
echo "❌ AWS credentials not configured"
|
|
68
|
+
echo " Run: aws configure"
|
|
69
|
+
echo " Or set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables"
|
|
70
|
+
exit 4
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
|
74
|
+
echo "✅ AWS credentials validated (Account: ${AWS_ACCOUNT_ID})"
|
|
75
|
+
|
|
76
|
+
# Source shared helpers
|
|
77
|
+
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
78
|
+
source "${SCRIPT_DIR}/lib/endpoint-config.sh"
|
|
79
|
+
|
|
80
|
+
# Validate execution role ARN
|
|
81
|
+
if [ -z "${ROLE_ARN:-}" ]; then
|
|
82
|
+
echo "❌ Execution role ARN not provided"
|
|
83
|
+
echo ""
|
|
84
|
+
echo "Usage:"
|
|
85
|
+
echo " export ROLE_ARN=arn:aws:iam::ACCOUNT_ID:role/YOUR_ROLE"
|
|
86
|
+
echo " ./do/deploy"
|
|
87
|
+
echo ""
|
|
88
|
+
echo "Or set ROLE_ARN in do/config"
|
|
89
|
+
echo ""
|
|
90
|
+
echo "The execution role must have permissions for:"
|
|
91
|
+
echo " • SageMaker model and endpoint management"
|
|
92
|
+
echo " • Access to the Marketplace model package"
|
|
93
|
+
echo " • CloudWatch Logs"
|
|
94
|
+
exit 3
|
|
95
|
+
fi
|
|
96
|
+
|
|
97
|
+
echo " Using execution role: ${ROLE_ARN}"
|
|
98
|
+
|
|
99
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
100
|
+
# ============================================================
|
|
101
|
+
# SageMaker Real-Time Inference Deployment (Model-Based)
|
|
102
|
+
# Marketplace packages use the classic model-based flow:
|
|
103
|
+
# CreateModel(ModelPackageName) → CreateEndpointConfig → CreateEndpoint
|
|
104
|
+
# ============================================================
|
|
105
|
+
|
|
106
|
+
# ============================================================
|
|
107
|
+
# Idempotency: check for existing deployment from a previous run
|
|
108
|
+
# ============================================================
|
|
109
|
+
SKIP_TO=""
|
|
110
|
+
|
|
111
|
+
if [ "${FORCE_NEW}" = true ]; then
|
|
112
|
+
echo "🔄 --force: ignoring previous deployment, creating new resources."
|
|
113
|
+
elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
114
|
+
echo "🔍 Checking for existing deployment: ${ENDPOINT_NAME}"
|
|
115
|
+
|
|
116
|
+
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
117
|
+
|
|
118
|
+
case "${EP_STATUS}" in
|
|
119
|
+
InService)
|
|
120
|
+
echo "✅ Endpoint already InService: ${ENDPOINT_NAME}"
|
|
121
|
+
echo ""
|
|
122
|
+
echo "📋 Deployment is already live. Nothing to do."
|
|
123
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
124
|
+
echo ""
|
|
125
|
+
echo "🧪 Test your endpoint:"
|
|
126
|
+
echo " ./do/test"
|
|
127
|
+
echo ""
|
|
128
|
+
echo "🧹 Clean up when done:"
|
|
129
|
+
echo " ./do/clean endpoint"
|
|
130
|
+
exit 0
|
|
131
|
+
;;
|
|
132
|
+
Creating|Updating)
|
|
133
|
+
echo "⏳ Endpoint still ${EP_STATUS}: ${ENDPOINT_NAME}"
|
|
134
|
+
SKIP_TO="wait_endpoint"
|
|
135
|
+
;;
|
|
136
|
+
Failed)
|
|
137
|
+
echo "⚠️ Previous endpoint failed: ${ENDPOINT_NAME}"
|
|
138
|
+
echo " Creating a new deployment. Clean up the failed endpoint with:"
|
|
139
|
+
echo " ./do/clean endpoint"
|
|
140
|
+
echo ""
|
|
141
|
+
;;
|
|
142
|
+
"")
|
|
143
|
+
echo " Previous endpoint not found (may have been cleaned up). Creating new deployment."
|
|
144
|
+
;;
|
|
145
|
+
*)
|
|
146
|
+
echo " Endpoint in unexpected state: ${EP_STATUS}. Creating new deployment."
|
|
147
|
+
;;
|
|
148
|
+
esac
|
|
149
|
+
fi
|
|
150
|
+
|
|
151
|
+
# ============================================================
|
|
152
|
+
# Create resources (skip if resuming from wait)
|
|
153
|
+
# ============================================================
|
|
154
|
+
if [ -z "${SKIP_TO}" ]; then
|
|
155
|
+
TIMESTAMP=$(date +%s)
|
|
156
|
+
MODEL_NAME_SM="${PROJECT_NAME}-mkt-model-${TIMESTAMP}"
|
|
157
|
+
ENDPOINT_CONFIG_NAME="${PROJECT_NAME}-mkt-epc-${TIMESTAMP}"
|
|
158
|
+
ENDPOINT_NAME="${PROJECT_NAME}-mkt-ep-${TIMESTAMP}"
|
|
159
|
+
|
|
160
|
+
_update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
|
|
161
|
+
_update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
|
|
162
|
+
_update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
|
|
163
|
+
|
|
164
|
+
# Step 1: Create SageMaker model from Marketplace model package
|
|
165
|
+
echo "📦 Creating SageMaker model from Marketplace package: ${MODEL_NAME_SM}"
|
|
166
|
+
if ! aws sagemaker create-model \
|
|
167
|
+
--model-name "${MODEL_NAME_SM}" \
|
|
168
|
+
--primary-container "{\"ModelPackageName\":\"${MODEL_PACKAGE_ARN}\"}" \
|
|
169
|
+
--execution-role-arn "${ROLE_ARN}" \
|
|
170
|
+
--region "${AWS_REGION}"; then
|
|
171
|
+
|
|
172
|
+
echo "❌ Failed to create model from package ARN. Check IAM permissions and subscription status."
|
|
173
|
+
echo " Check that:"
|
|
174
|
+
echo " • The model package ARN is correct: ${MODEL_PACKAGE_ARN}"
|
|
175
|
+
echo " • Your Marketplace subscription is active"
|
|
176
|
+
echo " • The execution role has permission to access the model package"
|
|
177
|
+
exit 4
|
|
178
|
+
fi
|
|
179
|
+
|
|
180
|
+
echo "✅ SageMaker model created: ${MODEL_NAME_SM}"
|
|
181
|
+
|
|
182
|
+
# Record model in manifest (non-blocking)
|
|
183
|
+
MODEL_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${MODEL_NAME_SM}"
|
|
184
|
+
./do/manifest add \
|
|
185
|
+
--type sagemaker-model \
|
|
186
|
+
--id "${MODEL_ARN}" \
|
|
187
|
+
--project "${PROJECT_NAME}" \
|
|
188
|
+
--meta "{\"modelName\":\"${MODEL_NAME_SM}\",\"modelPackageArn\":\"${MODEL_PACKAGE_ARN}\",\"region\":\"${AWS_REGION}\"}" \
|
|
189
|
+
2>/dev/null || true
|
|
190
|
+
|
|
191
|
+
# Step 2: Create endpoint configuration
|
|
192
|
+
# Set MODEL_NAME_SM so endpoint-config.sh uses model-based flow (no --execution-role-arn on epc)
|
|
193
|
+
VARIANT_JSON="[{\"VariantName\":\"AllTraffic\",\"ModelName\":\"${MODEL_NAME_SM}\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1}]"
|
|
194
|
+
|
|
195
|
+
echo "⚙️ Creating endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
|
|
196
|
+
if ! aws sagemaker create-endpoint-config \
|
|
197
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
198
|
+
--production-variants "${VARIANT_JSON}" \
|
|
199
|
+
--region "${AWS_REGION}"; then
|
|
200
|
+
|
|
201
|
+
echo "❌ Failed to create endpoint configuration"
|
|
202
|
+
echo " Check that:"
|
|
203
|
+
echo " • The instance type is valid: ${INSTANCE_TYPE}"
|
|
204
|
+
echo " • The instance type is available in region: ${AWS_REGION}"
|
|
205
|
+
echo " • You have sufficient service quota for the instance type"
|
|
206
|
+
exit 4
|
|
207
|
+
fi
|
|
208
|
+
|
|
209
|
+
echo "✅ Endpoint configuration created: ${ENDPOINT_CONFIG_NAME}"
|
|
210
|
+
|
|
211
|
+
# Record endpoint config in manifest (non-blocking)
|
|
212
|
+
ENDPOINT_CONFIG_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${ENDPOINT_CONFIG_NAME}"
|
|
213
|
+
./do/manifest add \
|
|
214
|
+
--type sagemaker-endpoint-config \
|
|
215
|
+
--id "${ENDPOINT_CONFIG_ARN}" \
|
|
216
|
+
--project "${PROJECT_NAME}" \
|
|
217
|
+
--meta "{\"endpointConfigName\":\"${ENDPOINT_CONFIG_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
218
|
+
2>/dev/null || true
|
|
219
|
+
|
|
220
|
+
# Step 3: Create endpoint
|
|
221
|
+
echo "🚀 Creating endpoint: ${ENDPOINT_NAME}"
|
|
222
|
+
if ! aws sagemaker create-endpoint \
|
|
223
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
224
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
225
|
+
--region "${AWS_REGION}"; then
|
|
226
|
+
|
|
227
|
+
echo "❌ Failed to create endpoint"
|
|
228
|
+
echo " Check that:"
|
|
229
|
+
echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
|
|
230
|
+
echo " • You have sufficient service quota in region: ${AWS_REGION}"
|
|
231
|
+
exit 4
|
|
232
|
+
fi
|
|
233
|
+
|
|
234
|
+
echo "✅ Endpoint creation initiated: ${ENDPOINT_NAME}"
|
|
235
|
+
|
|
236
|
+
# Record endpoint in manifest (non-blocking)
|
|
237
|
+
ENDPOINT_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${ENDPOINT_NAME}"
|
|
238
|
+
./do/manifest add \
|
|
239
|
+
--type sagemaker-endpoint \
|
|
240
|
+
--id "${ENDPOINT_ARN}" \
|
|
241
|
+
--project "${PROJECT_NAME}" \
|
|
242
|
+
--meta "{\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
243
|
+
2>/dev/null || true
|
|
244
|
+
fi
|
|
245
|
+
|
|
246
|
+
# ============================================================
|
|
247
|
+
# Wait for endpoint
|
|
248
|
+
# ============================================================
|
|
249
|
+
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
250
|
+
echo "⏳ Waiting for endpoint to reach InService status..."
|
|
251
|
+
echo " This may take several minutes..."
|
|
252
|
+
echo " If this times out, re-run ./do/deploy to resume."
|
|
253
|
+
|
|
254
|
+
wait_endpoint "${ENDPOINT_NAME}"
|
|
255
|
+
fi
|
|
256
|
+
|
|
257
|
+
echo "✅ Deployment complete!"
|
|
258
|
+
echo ""
|
|
259
|
+
echo "📋 Deployment Details:"
|
|
260
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
261
|
+
echo " Endpoint Config: ${ENDPOINT_CONFIG_NAME}"
|
|
262
|
+
echo " Model: ${SAGEMAKER_MODEL_NAME:-${MODEL_NAME_SM:-N/A}}"
|
|
263
|
+
echo " Model Package: ${MODEL_PACKAGE_ARN}"
|
|
264
|
+
echo " Region: ${AWS_REGION}"
|
|
265
|
+
echo " Instance Type: ${INSTANCE_TYPE}"
|
|
266
|
+
echo ""
|
|
267
|
+
echo "📋 What's next?"
|
|
268
|
+
echo " • Test your endpoint: ./do/test"
|
|
269
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
270
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
271
|
+
<% } %>
|
|
272
|
+
echo " • View endpoint status: ./do/status"
|
|
273
|
+
echo " • Register this deployment: ./do/register"
|
|
274
|
+
echo " • View logs: ./do/logs"
|
|
275
|
+
echo " • Clean up when done: ./do/clean endpoint"
|
|
276
|
+
|
|
277
|
+
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
278
|
+
# ============================================================
|
|
279
|
+
# SageMaker Async Inference Deployment (Model-Based)
|
|
280
|
+
# Marketplace packages use: CreateModel(ModelPackageName) → CreateEndpointConfig(AsyncInferenceConfig) → CreateEndpoint
|
|
281
|
+
# ============================================================
|
|
282
|
+
|
|
283
|
+
# ============================================================
|
|
284
|
+
# Bootstrap async infrastructure (S3 bucket + SNS topics)
|
|
285
|
+
# ============================================================
|
|
286
|
+
|
|
287
|
+
# Extract bucket name from S3 output path
|
|
288
|
+
ASYNC_S3_BUCKET=$(echo "${ASYNC_S3_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
|
|
289
|
+
|
|
290
|
+
<% if (!asyncS3OutputPath) { %>
|
|
291
|
+
# Bootstrap default S3 bucket (check-and-create)
|
|
292
|
+
echo "🔍 Checking if S3 bucket exists: ${ASYNC_S3_BUCKET}"
|
|
293
|
+
if ! aws s3api head-bucket --bucket "${ASYNC_S3_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
294
|
+
echo "📦 Creating S3 bucket: ${ASYNC_S3_BUCKET}"
|
|
295
|
+
if [ "${AWS_REGION}" = "us-east-1" ]; then
|
|
296
|
+
if ! aws s3api create-bucket \
|
|
297
|
+
--bucket "${ASYNC_S3_BUCKET}" \
|
|
298
|
+
--region "${AWS_REGION}"; then
|
|
299
|
+
echo "❌ Failed to create S3 bucket: ${ASYNC_S3_BUCKET}"
|
|
300
|
+
exit 4
|
|
301
|
+
fi
|
|
302
|
+
else
|
|
303
|
+
if ! aws s3api create-bucket \
|
|
304
|
+
--bucket "${ASYNC_S3_BUCKET}" \
|
|
305
|
+
--region "${AWS_REGION}" \
|
|
306
|
+
--create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
|
|
307
|
+
echo "❌ Failed to create S3 bucket: ${ASYNC_S3_BUCKET}"
|
|
308
|
+
exit 4
|
|
309
|
+
fi
|
|
310
|
+
fi
|
|
311
|
+
echo "✅ S3 bucket created: ${ASYNC_S3_BUCKET}"
|
|
312
|
+
else
|
|
313
|
+
echo "✅ S3 bucket exists: ${ASYNC_S3_BUCKET}"
|
|
314
|
+
fi
|
|
315
|
+
<% } else { %>
|
|
316
|
+
# Custom S3 output path provided — skip bucket creation
|
|
317
|
+
echo "✅ Using custom S3 output path: ${ASYNC_S3_OUTPUT_PATH}"
|
|
318
|
+
<% } %>
|
|
319
|
+
|
|
320
|
+
# Extract topic name from SNS success topic ARN
|
|
321
|
+
ASYNC_SNS_SUCCESS_TOPIC_NAME=$(echo "${ASYNC_SNS_SUCCESS_TOPIC}" | awk -F: '{print $NF}')
|
|
322
|
+
|
|
323
|
+
<% if (!asyncSnsSuccessTopic) { %>
|
|
324
|
+
# Bootstrap default SNS success topic (check-and-create)
|
|
325
|
+
echo "🔍 Checking if SNS success topic exists: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
326
|
+
if ! aws sns get-topic-attributes --topic-arn "${ASYNC_SNS_SUCCESS_TOPIC}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
327
|
+
echo "📦 Creating SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
328
|
+
if ! aws sns create-topic \
|
|
329
|
+
--name "${ASYNC_SNS_SUCCESS_TOPIC_NAME}" \
|
|
330
|
+
--region "${AWS_REGION}" > /dev/null; then
|
|
331
|
+
echo "❌ Failed to create SNS success topic"
|
|
332
|
+
exit 4
|
|
333
|
+
fi
|
|
334
|
+
echo "✅ SNS success topic created: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
335
|
+
else
|
|
336
|
+
echo "✅ SNS success topic exists: ${ASYNC_SNS_SUCCESS_TOPIC_NAME}"
|
|
337
|
+
fi
|
|
338
|
+
<% } else { %>
|
|
339
|
+
# Custom SNS success topic ARN provided — skip topic creation
|
|
340
|
+
echo "✅ Using custom SNS success topic: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
341
|
+
<% } %>
|
|
342
|
+
|
|
343
|
+
# Extract topic name from SNS error topic ARN
|
|
344
|
+
ASYNC_SNS_ERROR_TOPIC_NAME=$(echo "${ASYNC_SNS_ERROR_TOPIC}" | awk -F: '{print $NF}')
|
|
345
|
+
|
|
346
|
+
<% if (!asyncSnsErrorTopic) { %>
|
|
347
|
+
# Bootstrap default SNS error topic (check-and-create)
|
|
348
|
+
echo "🔍 Checking if SNS error topic exists: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
349
|
+
if ! aws sns get-topic-attributes --topic-arn "${ASYNC_SNS_ERROR_TOPIC}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
350
|
+
echo "📦 Creating SNS error topic: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
351
|
+
if ! aws sns create-topic \
|
|
352
|
+
--name "${ASYNC_SNS_ERROR_TOPIC_NAME}" \
|
|
353
|
+
--region "${AWS_REGION}" > /dev/null; then
|
|
354
|
+
echo "❌ Failed to create SNS error topic"
|
|
355
|
+
exit 4
|
|
356
|
+
fi
|
|
357
|
+
echo "✅ SNS error topic created: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
358
|
+
else
|
|
359
|
+
echo "✅ SNS error topic exists: ${ASYNC_SNS_ERROR_TOPIC_NAME}"
|
|
360
|
+
fi
|
|
361
|
+
<% } else { %>
|
|
362
|
+
# Custom SNS error topic ARN provided — skip topic creation
|
|
363
|
+
echo "✅ Using custom SNS error topic: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
364
|
+
<% } %>
|
|
365
|
+
|
|
366
|
+
# ============================================================
|
|
367
|
+
# Idempotency: check for existing deployment from a previous run
|
|
368
|
+
# ============================================================
|
|
369
|
+
SKIP_TO=""
|
|
370
|
+
|
|
371
|
+
if [ "${FORCE_NEW}" = true ]; then
|
|
372
|
+
echo "🔄 --force: ignoring previous deployment, creating new resources."
|
|
373
|
+
elif [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
374
|
+
echo "🔍 Checking for existing deployment: ${ENDPOINT_NAME}"
|
|
375
|
+
|
|
376
|
+
EP_STATUS=$(_get_endpoint_status "${ENDPOINT_NAME}")
|
|
377
|
+
|
|
378
|
+
case "${EP_STATUS}" in
|
|
379
|
+
InService)
|
|
380
|
+
echo "✅ Async endpoint already InService: ${ENDPOINT_NAME}"
|
|
381
|
+
echo ""
|
|
382
|
+
echo "📋 Deployment is already live. Nothing to do."
|
|
383
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
384
|
+
echo ""
|
|
385
|
+
echo "🧪 Test your async endpoint:"
|
|
386
|
+
echo " ./do/test"
|
|
387
|
+
echo ""
|
|
388
|
+
echo "🧹 Clean up when done:"
|
|
389
|
+
echo " ./do/clean endpoint"
|
|
390
|
+
exit 0
|
|
391
|
+
;;
|
|
392
|
+
Creating|Updating)
|
|
393
|
+
echo "⏳ Endpoint still ${EP_STATUS}: ${ENDPOINT_NAME}"
|
|
394
|
+
SKIP_TO="wait_endpoint"
|
|
395
|
+
;;
|
|
396
|
+
Failed)
|
|
397
|
+
echo "⚠️ Previous endpoint failed: ${ENDPOINT_NAME}"
|
|
398
|
+
echo " Creating a new deployment. Clean up the failed endpoint with:"
|
|
399
|
+
echo " ./do/clean endpoint"
|
|
400
|
+
echo ""
|
|
401
|
+
;;
|
|
402
|
+
"")
|
|
403
|
+
echo " Previous endpoint not found (may have been cleaned up). Creating new deployment."
|
|
404
|
+
;;
|
|
405
|
+
*)
|
|
406
|
+
echo " Endpoint in unexpected state: ${EP_STATUS}. Creating new deployment."
|
|
407
|
+
;;
|
|
408
|
+
esac
|
|
409
|
+
fi
|
|
410
|
+
|
|
411
|
+
# ============================================================
|
|
412
|
+
# Create async resources (skip if resuming from wait)
|
|
413
|
+
# ============================================================
|
|
414
|
+
if [ -z "${SKIP_TO}" ]; then
|
|
415
|
+
TIMESTAMP=$(date +%s)
|
|
416
|
+
MODEL_NAME_SM="${PROJECT_NAME}-mkt-async-model-${TIMESTAMP}"
|
|
417
|
+
ENDPOINT_CONFIG_NAME="${PROJECT_NAME}-mkt-async-epc-${TIMESTAMP}"
|
|
418
|
+
ENDPOINT_NAME="${PROJECT_NAME}-mkt-async-ep-${TIMESTAMP}"
|
|
419
|
+
|
|
420
|
+
_update_config_var "ENDPOINT_NAME" "${ENDPOINT_NAME}"
|
|
421
|
+
_update_config_var "ENDPOINT_CONFIG_NAME" "${ENDPOINT_CONFIG_NAME}"
|
|
422
|
+
_update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
|
|
423
|
+
|
|
424
|
+
# Step 1: Create SageMaker model from Marketplace model package
|
|
425
|
+
echo "📦 Creating SageMaker model from Marketplace package: ${MODEL_NAME_SM}"
|
|
426
|
+
if ! aws sagemaker create-model \
|
|
427
|
+
--model-name "${MODEL_NAME_SM}" \
|
|
428
|
+
--primary-container "{\"ModelPackageName\":\"${MODEL_PACKAGE_ARN}\"}" \
|
|
429
|
+
--execution-role-arn "${ROLE_ARN}" \
|
|
430
|
+
--region "${AWS_REGION}"; then
|
|
431
|
+
|
|
432
|
+
echo "❌ Failed to create model from package ARN. Check IAM permissions and subscription status."
|
|
433
|
+
echo " Check that:"
|
|
434
|
+
echo " • The model package ARN is correct: ${MODEL_PACKAGE_ARN}"
|
|
435
|
+
echo " • Your Marketplace subscription is active"
|
|
436
|
+
echo " • The execution role has permission to access the model package"
|
|
437
|
+
exit 4
|
|
438
|
+
fi
|
|
439
|
+
|
|
440
|
+
echo "✅ SageMaker model created: ${MODEL_NAME_SM}"
|
|
441
|
+
|
|
442
|
+
# Record model in manifest (non-blocking)
|
|
443
|
+
MODEL_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${MODEL_NAME_SM}"
|
|
444
|
+
./do/manifest add \
|
|
445
|
+
--type sagemaker-model \
|
|
446
|
+
--id "${MODEL_ARN}" \
|
|
447
|
+
--project "${PROJECT_NAME}" \
|
|
448
|
+
--meta "{\"modelName\":\"${MODEL_NAME_SM}\",\"modelPackageArn\":\"${MODEL_PACKAGE_ARN}\",\"region\":\"${AWS_REGION}\"}" \
|
|
449
|
+
2>/dev/null || true
|
|
450
|
+
|
|
451
|
+
# Step 2: Build production variant and AsyncInferenceConfig
|
|
452
|
+
VARIANT_JSON="[{\"VariantName\":\"AllTraffic\",\"ModelName\":\"${MODEL_NAME_SM}\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1}]"
|
|
453
|
+
|
|
454
|
+
ASYNC_CONFIG="{\"OutputConfig\":{\"S3OutputPath\":\"${ASYNC_S3_OUTPUT_PATH}\",\"NotificationConfig\":{\"SuccessTopic\":\"${ASYNC_SNS_SUCCESS_TOPIC}\",\"ErrorTopic\":\"${ASYNC_SNS_ERROR_TOPIC}\"}}"
|
|
455
|
+
if [ -n "${ASYNC_MAX_CONCURRENT_INVOCATIONS:-}" ]; then
|
|
456
|
+
ASYNC_CONFIG="${ASYNC_CONFIG},\"ClientConfig\":{\"MaxConcurrentInvocationsPerInstance\":${ASYNC_MAX_CONCURRENT_INVOCATIONS}}"
|
|
457
|
+
fi
|
|
458
|
+
ASYNC_CONFIG="${ASYNC_CONFIG}}"
|
|
459
|
+
|
|
460
|
+
# Step 3: Create endpoint configuration with AsyncInferenceConfig
|
|
461
|
+
echo "⚙️ Creating async endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
|
|
462
|
+
if ! aws sagemaker create-endpoint-config \
|
|
463
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
464
|
+
--production-variants "${VARIANT_JSON}" \
|
|
465
|
+
--async-inference-config "${ASYNC_CONFIG}" \
|
|
466
|
+
--region "${AWS_REGION}"; then
|
|
467
|
+
|
|
468
|
+
echo "❌ Failed to create async endpoint configuration"
|
|
469
|
+
echo " Check that:"
|
|
470
|
+
echo " • The S3 output path is accessible: ${ASYNC_S3_OUTPUT_PATH}"
|
|
471
|
+
echo " • The IAM role has s3:PutObject permission on the output path"
|
|
472
|
+
echo " • The instance type is valid: ${INSTANCE_TYPE}"
|
|
473
|
+
echo " • You have sufficient service quota for the instance type"
|
|
474
|
+
exit 4
|
|
475
|
+
fi
|
|
476
|
+
|
|
477
|
+
echo "✅ Async endpoint configuration created: ${ENDPOINT_CONFIG_NAME}"
|
|
478
|
+
|
|
479
|
+
# Record endpoint config in manifest (non-blocking)
|
|
480
|
+
ENDPOINT_CONFIG_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint-config/${ENDPOINT_CONFIG_NAME}"
|
|
481
|
+
./do/manifest add \
|
|
482
|
+
--type sagemaker-endpoint-config \
|
|
483
|
+
--id "${ENDPOINT_CONFIG_ARN}" \
|
|
484
|
+
--project "${PROJECT_NAME}" \
|
|
485
|
+
--meta "{\"endpointConfigName\":\"${ENDPOINT_CONFIG_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
486
|
+
2>/dev/null || true
|
|
487
|
+
|
|
488
|
+
# Step 4: Create endpoint
|
|
489
|
+
echo "🚀 Creating async endpoint: ${ENDPOINT_NAME}"
|
|
490
|
+
if ! aws sagemaker create-endpoint \
|
|
491
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
492
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
493
|
+
--region "${AWS_REGION}"; then
|
|
494
|
+
|
|
495
|
+
echo "❌ Failed to create async endpoint"
|
|
496
|
+
echo " Check that:"
|
|
497
|
+
echo " • Your IAM credentials have sagemaker:CreateEndpoint permission"
|
|
498
|
+
echo " • You have sufficient service quota in region: ${AWS_REGION}"
|
|
499
|
+
exit 4
|
|
500
|
+
fi
|
|
501
|
+
|
|
502
|
+
echo "✅ Async endpoint creation initiated: ${ENDPOINT_NAME}"
|
|
503
|
+
|
|
504
|
+
# Record endpoint in manifest (non-blocking)
|
|
505
|
+
ENDPOINT_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:endpoint/${ENDPOINT_NAME}"
|
|
506
|
+
./do/manifest add \
|
|
507
|
+
--type sagemaker-endpoint \
|
|
508
|
+
--id "${ENDPOINT_ARN}" \
|
|
509
|
+
--project "${PROJECT_NAME}" \
|
|
510
|
+
--meta "{\"endpointName\":\"${ENDPOINT_NAME}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
511
|
+
2>/dev/null || true
|
|
512
|
+
fi
|
|
513
|
+
|
|
514
|
+
# ============================================================
|
|
515
|
+
# Wait for endpoint
|
|
516
|
+
# ============================================================
|
|
517
|
+
if [ -z "${SKIP_TO}" ] || [ "${SKIP_TO}" = "wait_endpoint" ]; then
|
|
518
|
+
echo "⏳ Waiting for async endpoint to reach InService status..."
|
|
519
|
+
echo " This may take several minutes..."
|
|
520
|
+
echo " If this times out, re-run ./do/deploy to resume."
|
|
521
|
+
|
|
522
|
+
wait_endpoint "${ENDPOINT_NAME}"
|
|
523
|
+
fi
|
|
524
|
+
|
|
525
|
+
echo "✅ Async deployment complete!"
|
|
526
|
+
echo ""
|
|
527
|
+
echo "📋 Deployment Details:"
|
|
528
|
+
echo " Endpoint: ${ENDPOINT_NAME}"
|
|
529
|
+
echo " Endpoint Config: ${ENDPOINT_CONFIG_NAME}"
|
|
530
|
+
echo " Model: ${SAGEMAKER_MODEL_NAME:-${MODEL_NAME_SM:-N/A}}"
|
|
531
|
+
echo " Model Package: ${MODEL_PACKAGE_ARN}"
|
|
532
|
+
echo " Region: ${AWS_REGION}"
|
|
533
|
+
echo " Instance Type: ${INSTANCE_TYPE}"
|
|
534
|
+
echo " S3 Output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
535
|
+
echo " SNS Success: ${ASYNC_SNS_SUCCESS_TOPIC}"
|
|
536
|
+
echo " SNS Error: ${ASYNC_SNS_ERROR_TOPIC}"
|
|
537
|
+
echo ""
|
|
538
|
+
echo "📋 What's next?"
|
|
539
|
+
echo " • Test your async endpoint: ./do/test"
|
|
540
|
+
echo " • Check async output: aws s3 ls ${ASYNC_S3_OUTPUT_PATH}"
|
|
541
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
542
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
543
|
+
<% } %>
|
|
544
|
+
echo " • Register this deployment: ./do/register"
|
|
545
|
+
echo " • View logs: ./do/logs"
|
|
546
|
+
echo " • Clean up when done: ./do/clean endpoint"
|
|
547
|
+
|
|
548
|
+
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
549
|
+
# ============================================================
|
|
550
|
+
# SageMaker Batch Transform Deployment
|
|
551
|
+
# Marketplace packages use: CreateModel(ModelPackageName) → CreateTransformJob
|
|
552
|
+
# ============================================================
|
|
553
|
+
|
|
554
|
+
# Validate S3 input path
|
|
555
|
+
if [ -z "${BATCH_INPUT_PATH:-}" ]; then
|
|
556
|
+
echo "❌ S3 input path not provided"
|
|
557
|
+
echo ""
|
|
558
|
+
echo "Set BATCH_INPUT_PATH in do/config or provide via CLI:"
|
|
559
|
+
echo " export BATCH_INPUT_PATH=s3://my-bucket/input/"
|
|
560
|
+
echo " ./do/deploy"
|
|
561
|
+
exit 3
|
|
562
|
+
fi
|
|
563
|
+
|
|
564
|
+
if [[ "${BATCH_INPUT_PATH}" != s3://* ]]; then
|
|
565
|
+
echo "❌ S3 input path must start with s3://"
|
|
566
|
+
echo " Current value: ${BATCH_INPUT_PATH}"
|
|
567
|
+
exit 3
|
|
568
|
+
fi
|
|
569
|
+
|
|
570
|
+
# Validate S3 output path
|
|
571
|
+
if [ -z "${BATCH_OUTPUT_PATH:-}" ]; then
|
|
572
|
+
echo "❌ S3 output path not provided"
|
|
573
|
+
echo ""
|
|
574
|
+
echo "Set BATCH_OUTPUT_PATH in do/config or provide via CLI:"
|
|
575
|
+
echo " export BATCH_OUTPUT_PATH=s3://my-bucket/output/"
|
|
576
|
+
echo " ./do/deploy"
|
|
577
|
+
exit 3
|
|
578
|
+
fi
|
|
579
|
+
|
|
580
|
+
if [[ "${BATCH_OUTPUT_PATH}" != s3://* ]]; then
|
|
581
|
+
echo "❌ S3 output path must start with s3://"
|
|
582
|
+
echo " Current value: ${BATCH_OUTPUT_PATH}"
|
|
583
|
+
exit 3
|
|
584
|
+
fi
|
|
585
|
+
|
|
586
|
+
# ============================================================
|
|
587
|
+
# Bootstrap S3 buckets for batch transform
|
|
588
|
+
# ============================================================
|
|
589
|
+
|
|
590
|
+
BATCH_INPUT_BUCKET=$(echo "${BATCH_INPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
|
|
591
|
+
BATCH_OUTPUT_BUCKET=$(echo "${BATCH_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
|
|
592
|
+
|
|
593
|
+
<% if (!batchInputPath) { %>
|
|
594
|
+
# Bootstrap default S3 input bucket (check-and-create)
|
|
595
|
+
echo "🔍 Checking if S3 input bucket exists: ${BATCH_INPUT_BUCKET}"
|
|
596
|
+
if ! aws s3api head-bucket --bucket "${BATCH_INPUT_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
597
|
+
echo "📦 Creating S3 input bucket: ${BATCH_INPUT_BUCKET}"
|
|
598
|
+
if [ "${AWS_REGION}" = "us-east-1" ]; then
|
|
599
|
+
if ! aws s3api create-bucket \
|
|
600
|
+
--bucket "${BATCH_INPUT_BUCKET}" \
|
|
601
|
+
--region "${AWS_REGION}"; then
|
|
602
|
+
echo "❌ Failed to create S3 input bucket: ${BATCH_INPUT_BUCKET}"
|
|
603
|
+
exit 4
|
|
604
|
+
fi
|
|
605
|
+
else
|
|
606
|
+
if ! aws s3api create-bucket \
|
|
607
|
+
--bucket "${BATCH_INPUT_BUCKET}" \
|
|
608
|
+
--region "${AWS_REGION}" \
|
|
609
|
+
--create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
|
|
610
|
+
echo "❌ Failed to create S3 input bucket: ${BATCH_INPUT_BUCKET}"
|
|
611
|
+
exit 4
|
|
612
|
+
fi
|
|
613
|
+
fi
|
|
614
|
+
echo "✅ S3 input bucket created: ${BATCH_INPUT_BUCKET}"
|
|
615
|
+
else
|
|
616
|
+
echo "✅ S3 input bucket exists: ${BATCH_INPUT_BUCKET}"
|
|
617
|
+
fi
|
|
618
|
+
|
|
619
|
+
# Upload sample input file if the input prefix is empty
|
|
620
|
+
EXISTING_OBJECTS=$(aws s3 ls "${BATCH_INPUT_PATH}" --region "${AWS_REGION}" 2>/dev/null | head -1 || true)
|
|
621
|
+
if [ -z "${EXISTING_OBJECTS}" ]; then
|
|
622
|
+
echo "📄 Uploading sample input file to ${BATCH_INPUT_PATH}"
|
|
623
|
+
echo '{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50}}' | aws s3 cp - "${BATCH_INPUT_PATH}sample.jsonl" --region "${AWS_REGION}"
|
|
624
|
+
echo "✅ Sample input uploaded: ${BATCH_INPUT_PATH}sample.jsonl"
|
|
625
|
+
echo " ⚠️ Replace this with your actual input data before running production jobs"
|
|
626
|
+
fi
|
|
627
|
+
<% } else { %>
|
|
628
|
+
# Custom S3 input path provided — skip bucket creation
|
|
629
|
+
echo "✅ Using custom S3 input path: ${BATCH_INPUT_PATH}"
|
|
630
|
+
<% } %>
|
|
631
|
+
|
|
632
|
+
<% if (!batchOutputPath) { %>
|
|
633
|
+
# Bootstrap default S3 output bucket (check-and-create, may be same as input)
|
|
634
|
+
if [ "${BATCH_OUTPUT_BUCKET}" != "${BATCH_INPUT_BUCKET}" ]; then
|
|
635
|
+
echo "🔍 Checking if S3 output bucket exists: ${BATCH_OUTPUT_BUCKET}"
|
|
636
|
+
if ! aws s3api head-bucket --bucket "${BATCH_OUTPUT_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
|
|
637
|
+
echo "📦 Creating S3 output bucket: ${BATCH_OUTPUT_BUCKET}"
|
|
638
|
+
if [ "${AWS_REGION}" = "us-east-1" ]; then
|
|
639
|
+
if ! aws s3api create-bucket \
|
|
640
|
+
--bucket "${BATCH_OUTPUT_BUCKET}" \
|
|
641
|
+
--region "${AWS_REGION}"; then
|
|
642
|
+
echo "❌ Failed to create S3 output bucket: ${BATCH_OUTPUT_BUCKET}"
|
|
643
|
+
exit 4
|
|
644
|
+
fi
|
|
645
|
+
else
|
|
646
|
+
if ! aws s3api create-bucket \
|
|
647
|
+
--bucket "${BATCH_OUTPUT_BUCKET}" \
|
|
648
|
+
--region "${AWS_REGION}" \
|
|
649
|
+
--create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
|
|
650
|
+
echo "❌ Failed to create S3 output bucket: ${BATCH_OUTPUT_BUCKET}"
|
|
651
|
+
exit 4
|
|
652
|
+
fi
|
|
653
|
+
fi
|
|
654
|
+
echo "✅ S3 output bucket created: ${BATCH_OUTPUT_BUCKET}"
|
|
655
|
+
else
|
|
656
|
+
echo "✅ S3 output bucket exists: ${BATCH_OUTPUT_BUCKET}"
|
|
657
|
+
fi
|
|
658
|
+
else
|
|
659
|
+
echo "✅ S3 output bucket same as input: ${BATCH_OUTPUT_BUCKET}"
|
|
660
|
+
fi
|
|
661
|
+
<% } else { %>
|
|
662
|
+
# Custom S3 output path provided — skip bucket creation
|
|
663
|
+
echo "✅ Using custom S3 output path: ${BATCH_OUTPUT_PATH}"
|
|
664
|
+
<% } %>
|
|
665
|
+
|
|
666
|
+
# ============================================================
|
|
667
|
+
# Check for previous transform job still running
|
|
668
|
+
# ============================================================
|
|
669
|
+
if [ "${FORCE_NEW}" != true ] && [ -n "${TRANSFORM_JOB_NAME:-}" ]; then
|
|
670
|
+
echo "🔍 Checking previous transform job: ${TRANSFORM_JOB_NAME}"
|
|
671
|
+
PREV_JOB_STATUS=$(aws sagemaker describe-transform-job \
|
|
672
|
+
--transform-job-name "${TRANSFORM_JOB_NAME}" \
|
|
673
|
+
--region "${AWS_REGION}" \
|
|
674
|
+
--query "TransformJobStatus" \
|
|
675
|
+
--output text 2>/dev/null || echo "")
|
|
676
|
+
|
|
677
|
+
case "${PREV_JOB_STATUS}" in
|
|
678
|
+
InProgress)
|
|
679
|
+
echo "⚠️ Previous transform job is still running: ${TRANSFORM_JOB_NAME}"
|
|
680
|
+
echo " Wait for it to complete, or stop it with:"
|
|
681
|
+
echo " aws sagemaker stop-transform-job --transform-job-name ${TRANSFORM_JOB_NAME} --region ${AWS_REGION}"
|
|
682
|
+
echo ""
|
|
683
|
+
echo " Use --force to create a new job anyway."
|
|
684
|
+
exit 4
|
|
685
|
+
;;
|
|
686
|
+
Completed)
|
|
687
|
+
echo "✅ Previous transform job completed: ${TRANSFORM_JOB_NAME}"
|
|
688
|
+
echo " Creating a new job. Results from the previous job are in:"
|
|
689
|
+
echo " ${BATCH_OUTPUT_PATH}"
|
|
690
|
+
echo ""
|
|
691
|
+
;;
|
|
692
|
+
*)
|
|
693
|
+
# Failed, Stopped, or not found — proceed with new job
|
|
694
|
+
;;
|
|
695
|
+
esac
|
|
696
|
+
fi
|
|
697
|
+
|
|
698
|
+
# Generate unique names with timestamp
|
|
699
|
+
TIMESTAMP=$(date +%s)
|
|
700
|
+
MODEL_NAME_SM="${PROJECT_NAME}-mkt-batch-model-${TIMESTAMP}"
|
|
701
|
+
TRANSFORM_JOB_NAME="${PROJECT_NAME}-mkt-batch-job-${TIMESTAMP}"
|
|
702
|
+
|
|
703
|
+
_update_config_var "TRANSFORM_JOB_NAME" "${TRANSFORM_JOB_NAME}"
|
|
704
|
+
_update_config_var "SAGEMAKER_MODEL_NAME" "${MODEL_NAME_SM}"
|
|
705
|
+
|
|
706
|
+
# Step 1: Create SageMaker model from Marketplace model package
|
|
707
|
+
echo "📦 Creating SageMaker model from Marketplace package: ${MODEL_NAME_SM}"
|
|
708
|
+
if ! aws sagemaker create-model \
|
|
709
|
+
--model-name "${MODEL_NAME_SM}" \
|
|
710
|
+
--primary-container "{\"ModelPackageName\":\"${MODEL_PACKAGE_ARN}\"}" \
|
|
711
|
+
--execution-role-arn "${ROLE_ARN}" \
|
|
712
|
+
--region "${AWS_REGION}"; then
|
|
713
|
+
|
|
714
|
+
echo "❌ Failed to create model from package ARN. Check IAM permissions and subscription status."
|
|
715
|
+
echo " Check that:"
|
|
716
|
+
echo " • The model package ARN is correct: ${MODEL_PACKAGE_ARN}"
|
|
717
|
+
echo " • Your Marketplace subscription is active"
|
|
718
|
+
echo " • The execution role has permission to access the model package"
|
|
719
|
+
exit 4
|
|
720
|
+
fi
|
|
721
|
+
|
|
722
|
+
echo "✅ SageMaker model created: ${MODEL_NAME_SM}"
|
|
723
|
+
|
|
724
|
+
# Record model in manifest (non-blocking)
|
|
725
|
+
MODEL_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:model/${MODEL_NAME_SM}"
|
|
726
|
+
./do/manifest add \
|
|
727
|
+
--type sagemaker-model \
|
|
728
|
+
--id "${MODEL_ARN}" \
|
|
729
|
+
--project "${PROJECT_NAME}" \
|
|
730
|
+
--meta "{\"modelName\":\"${MODEL_NAME_SM}\",\"modelPackageArn\":\"${MODEL_PACKAGE_ARN}\",\"region\":\"${AWS_REGION}\"}" \
|
|
731
|
+
2>/dev/null || true
|
|
732
|
+
|
|
733
|
+
# Step 2: Build transform job JSON
|
|
734
|
+
TRANSFORM_JOB_JSON="{
|
|
735
|
+
\"TransformJobName\": \"${TRANSFORM_JOB_NAME}\",
|
|
736
|
+
\"ModelName\": \"${MODEL_NAME_SM}\",
|
|
737
|
+
\"TransformInput\": {
|
|
738
|
+
\"DataSource\": {
|
|
739
|
+
\"S3DataSource\": {
|
|
740
|
+
\"S3DataType\": \"S3Prefix\",
|
|
741
|
+
\"S3Uri\": \"${BATCH_INPUT_PATH}\"
|
|
742
|
+
}
|
|
743
|
+
},
|
|
744
|
+
\"ContentType\": \"application/json\",
|
|
745
|
+
\"SplitType\": \"${BATCH_SPLIT_TYPE}\"
|
|
746
|
+
},
|
|
747
|
+
\"TransformOutput\": {
|
|
748
|
+
\"S3OutputPath\": \"${BATCH_OUTPUT_PATH}\"
|
|
749
|
+
$([ "${BATCH_JOIN_SOURCE:-None}" = "Input" ] && echo ",\"Accept\": \"application/json\", \"AssembleWith\": \"${BATCH_SPLIT_TYPE}\"")
|
|
750
|
+
},
|
|
751
|
+
\"TransformResources\": {
|
|
752
|
+
\"InstanceType\": \"${INSTANCE_TYPE}\",
|
|
753
|
+
\"InstanceCount\": ${BATCH_INSTANCE_COUNT}
|
|
754
|
+
},
|
|
755
|
+
\"MaxConcurrentTransforms\": ${BATCH_MAX_CONCURRENT_TRANSFORMS:-1},
|
|
756
|
+
\"MaxPayloadInMB\": ${BATCH_MAX_PAYLOAD_IN_MB:-6},
|
|
757
|
+
\"BatchStrategy\": \"${BATCH_STRATEGY}\"
|
|
758
|
+
$([ "${BATCH_JOIN_SOURCE:-None}" = "Input" ] && echo ",\"DataProcessing\": { \"JoinSource\": \"Input\" }")
|
|
759
|
+
}"
|
|
760
|
+
|
|
761
|
+
# Step 3: Create transform job
|
|
762
|
+
echo "🚀 Creating transform job: ${TRANSFORM_JOB_NAME}"
|
|
763
|
+
if ! aws sagemaker create-transform-job \
|
|
764
|
+
--cli-input-json "${TRANSFORM_JOB_JSON}" \
|
|
765
|
+
--region "${AWS_REGION}"; then
|
|
766
|
+
|
|
767
|
+
echo "❌ Failed to create transform job"
|
|
768
|
+
echo " Check that:"
|
|
769
|
+
echo " • The S3 input path exists and is accessible: ${BATCH_INPUT_PATH}"
|
|
770
|
+
echo " • The S3 output path is writable: ${BATCH_OUTPUT_PATH}"
|
|
771
|
+
echo " • The instance type is valid: ${INSTANCE_TYPE}"
|
|
772
|
+
echo " • You have sufficient service quota for the instance type"
|
|
773
|
+
exit 4
|
|
774
|
+
fi
|
|
775
|
+
|
|
776
|
+
echo "✅ Transform job created: ${TRANSFORM_JOB_NAME}"
|
|
777
|
+
|
|
778
|
+
# Record transform job in manifest (non-blocking)
|
|
779
|
+
TRANSFORM_JOB_ARN="arn:aws:sagemaker:${AWS_REGION}:${AWS_ACCOUNT_ID}:transform-job/${TRANSFORM_JOB_NAME}"
|
|
780
|
+
./do/manifest add \
|
|
781
|
+
--type sagemaker-transform-job \
|
|
782
|
+
--id "${TRANSFORM_JOB_ARN}" \
|
|
783
|
+
--project "${PROJECT_NAME}" \
|
|
784
|
+
--meta "{\"transformJobName\":\"${TRANSFORM_JOB_NAME}\",\"modelName\":\"${MODEL_NAME_SM}\",\"instanceType\":\"${INSTANCE_TYPE}\",\"region\":\"${AWS_REGION}\"}" \
|
|
785
|
+
2>/dev/null || true
|
|
786
|
+
|
|
787
|
+
# Step 4: Poll transform job status until completion or failure
|
|
788
|
+
echo "⏳ Waiting for transform job to complete..."
|
|
789
|
+
echo " This may take several minutes depending on dataset size..."
|
|
790
|
+
echo " If this times out, check status with:"
|
|
791
|
+
echo " aws sagemaker describe-transform-job --transform-job-name ${TRANSFORM_JOB_NAME} --region ${AWS_REGION}"
|
|
792
|
+
echo ""
|
|
793
|
+
|
|
794
|
+
while true; do
|
|
795
|
+
JOB_STATUS=$(aws sagemaker describe-transform-job \
|
|
796
|
+
--transform-job-name "${TRANSFORM_JOB_NAME}" \
|
|
797
|
+
--region "${AWS_REGION}" \
|
|
798
|
+
--query "TransformJobStatus" \
|
|
799
|
+
--output text 2>&1) || {
|
|
800
|
+
if echo "${JOB_STATUS}" | grep -qi "expired\|token"; then
|
|
801
|
+
echo ""
|
|
802
|
+
echo "⚠️ Credentials expired, but the transform job is still running."
|
|
803
|
+
echo " Refresh your credentials and check status with:"
|
|
804
|
+
echo " aws sagemaker describe-transform-job --transform-job-name ${TRANSFORM_JOB_NAME} --region ${AWS_REGION} --query TransformJobStatus"
|
|
805
|
+
exit 4
|
|
806
|
+
fi
|
|
807
|
+
echo "❌ Failed to describe transform job: ${TRANSFORM_JOB_NAME}"
|
|
808
|
+
echo " Error: ${JOB_STATUS}"
|
|
809
|
+
exit 4
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
case "${JOB_STATUS}" in
|
|
813
|
+
Completed)
|
|
814
|
+
echo "✅ Transform job completed successfully!"
|
|
815
|
+
break
|
|
816
|
+
;;
|
|
817
|
+
Failed)
|
|
818
|
+
FAILURE_REASON=$(aws sagemaker describe-transform-job \
|
|
819
|
+
--transform-job-name "${TRANSFORM_JOB_NAME}" \
|
|
820
|
+
--region "${AWS_REGION}" \
|
|
821
|
+
--query "FailureReason" \
|
|
822
|
+
--output text 2>/dev/null || echo "Unknown")
|
|
823
|
+
echo "❌ Transform job failed"
|
|
824
|
+
echo " Reason: ${FAILURE_REASON}"
|
|
825
|
+
echo ""
|
|
826
|
+
echo " Check CloudWatch Logs for details:"
|
|
827
|
+
echo " https://console.aws.amazon.com/cloudwatch/home?region=${AWS_REGION}#logsV2:log-groups/log-group//aws/sagemaker/TransformJobs"
|
|
828
|
+
exit 4
|
|
829
|
+
;;
|
|
830
|
+
Stopped)
|
|
831
|
+
echo "⚠️ Transform job was stopped"
|
|
832
|
+
exit 4
|
|
833
|
+
;;
|
|
834
|
+
InProgress)
|
|
835
|
+
echo " $(date +%H:%M:%S) Job status: InProgress..."
|
|
836
|
+
sleep 30
|
|
837
|
+
;;
|
|
838
|
+
*)
|
|
839
|
+
echo " $(date +%H:%M:%S) Job status: ${JOB_STATUS}..."
|
|
840
|
+
sleep 30
|
|
841
|
+
;;
|
|
842
|
+
esac
|
|
843
|
+
done
|
|
844
|
+
|
|
845
|
+
echo ""
|
|
846
|
+
echo "📋 Deployment Details:"
|
|
847
|
+
echo " Transform Job: ${TRANSFORM_JOB_NAME}"
|
|
848
|
+
echo " Model: ${MODEL_NAME_SM}"
|
|
849
|
+
echo " Model Package: ${MODEL_PACKAGE_ARN}"
|
|
850
|
+
echo " Region: ${AWS_REGION}"
|
|
851
|
+
echo " Instance Type: ${INSTANCE_TYPE}"
|
|
852
|
+
echo " Instance Count: ${BATCH_INSTANCE_COUNT}"
|
|
853
|
+
echo " S3 Input: ${BATCH_INPUT_PATH}"
|
|
854
|
+
echo " S3 Output: ${BATCH_OUTPUT_PATH}"
|
|
855
|
+
echo " Split Type: ${BATCH_SPLIT_TYPE}"
|
|
856
|
+
echo " Strategy: ${BATCH_STRATEGY}"
|
|
857
|
+
echo ""
|
|
858
|
+
|
|
859
|
+
# Download results locally
|
|
860
|
+
LOCAL_OUTPUT_DIR="${SCRIPT_DIR}/../batch-output"
|
|
861
|
+
mkdir -p "${LOCAL_OUTPUT_DIR}"
|
|
862
|
+
echo "📥 Downloading results to ${LOCAL_OUTPUT_DIR}/"
|
|
863
|
+
if aws s3 sync "${BATCH_OUTPUT_PATH}" "${LOCAL_OUTPUT_DIR}/" --region "${AWS_REGION}"; then
|
|
864
|
+
DOWNLOADED=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | wc -l | tr -d ' ')
|
|
865
|
+
echo "✅ Downloaded ${DOWNLOADED} file(s) to ${LOCAL_OUTPUT_DIR}/"
|
|
866
|
+
echo ""
|
|
867
|
+
|
|
868
|
+
# Display first output file preview
|
|
869
|
+
FIRST_FILE=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | head -1)
|
|
870
|
+
if [ -n "${FIRST_FILE}" ]; then
|
|
871
|
+
echo "📄 Sample output (${FIRST_FILE}):"
|
|
872
|
+
head -5 "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}"
|
|
873
|
+
LINES=$(wc -l < "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}" | tr -d ' ')
|
|
874
|
+
if [ "${LINES}" -gt 5 ]; then
|
|
875
|
+
echo " ... (${LINES} total lines)"
|
|
876
|
+
fi
|
|
877
|
+
fi
|
|
878
|
+
else
|
|
879
|
+
echo "⚠️ Could not download output files"
|
|
880
|
+
fi
|
|
881
|
+
|
|
882
|
+
echo ""
|
|
883
|
+
echo "📋 What's next?"
|
|
884
|
+
echo " • View results: cat batch-output/"
|
|
885
|
+
echo " • Review results: ./do/test"
|
|
886
|
+
echo " • Register this deployment: ./do/register"
|
|
887
|
+
echo " • View logs: ./do/logs"
|
|
888
|
+
echo " • Clean up when done: ./do/clean"
|
|
889
|
+
|
|
890
|
+
<% } %>
|