@aws/ml-container-creator 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,453 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ set -e
6
+ set -u
7
+ set -o pipefail
8
+
9
+ # Source configuration
10
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
+ source "${SCRIPT_DIR}/config"
12
+
13
+ # ============================================================
14
+ # Marketplace Model Package Testing
15
+ # ============================================================
16
+ # This test script invokes the deployed marketplace endpoint.
17
+ # Local-mode testing is NOT available for marketplace model packages
18
+ # because there is no local container to run — the vendor provides
19
+ # the container image via AWS Marketplace.
20
+ # ============================================================
21
+
22
+ # Parse arguments
23
+ show_help() {
24
+ echo "Usage: ./do/test [options]"
25
+ echo ""
26
+ echo "Test the deployed marketplace model package endpoint."
27
+ echo ""
28
+ echo "Options:"
29
+ echo " --help Show this help message"
30
+ echo ""
31
+ echo "⚠️ Local mode is NOT available for marketplace model packages."
32
+ echo " Marketplace models run on AWS infrastructure using the vendor's"
33
+ echo " container image. There is no local container to test against."
34
+ echo " Deploy first with ./do/deploy, then run ./do/test to invoke the endpoint."
35
+ echo ""
36
+ }
37
+
38
+ for arg in "$@"; do
39
+ case "${arg}" in
40
+ --help|-h)
41
+ show_help
42
+ exit 0
43
+ ;;
44
+ --local|-l)
45
+ echo "❌ Local mode is NOT available for marketplace model packages."
46
+ echo ""
47
+ echo " Marketplace models use the vendor's container image hosted on AWS."
48
+ echo " There is no local container to build or run."
49
+ echo ""
50
+ echo " To test your deployment:"
51
+ echo " 1. Deploy the model: ./do/deploy"
52
+ echo " 2. Test the endpoint: ./do/test"
53
+ echo ""
54
+ exit 1
55
+ ;;
56
+ esac
57
+ done
58
+
59
+ <% if (deploymentTarget === 'realtime-inference') { %>
60
+ # ============================================================
61
+ # Real-Time Inference Testing
62
+ # ============================================================
63
+
64
+ echo "🧪 Testing marketplace endpoint"
65
+ echo " Project: ${PROJECT_NAME}"
66
+ echo " Model package: ${MODEL_PACKAGE_ARN}"
67
+ echo " Region: ${AWS_REGION}"
68
+ echo ""
69
+
70
+ # Verify endpoint exists and is InService
71
+ ENDPOINT_NAME="${ENDPOINT_NAME:-${PROJECT_NAME}}"
72
+
73
+ echo "🔍 Test 1: Endpoint health check"
74
+ echo " Checking endpoint status..."
75
+
76
+ if ! ENDPOINT_STATUS=$(aws sagemaker describe-endpoint \
77
+ --endpoint-name "${ENDPOINT_NAME}" \
78
+ --region "${AWS_REGION}" \
79
+ --query 'EndpointStatus' \
80
+ --output text 2>&1); then
81
+ echo "❌ Endpoint not found or not accessible"
82
+ echo " Error: ${ENDPOINT_STATUS}"
83
+ echo ""
84
+ echo " Make sure you have deployed first: ./do/deploy"
85
+ exit 1
86
+ fi
87
+
88
+ if [ "${ENDPOINT_STATUS}" = "InService" ]; then
89
+ echo "✅ Endpoint is InService"
90
+ else
91
+ echo "❌ Endpoint is not InService (Status: ${ENDPOINT_STATUS})"
92
+ exit 1
93
+ fi
94
+
95
+ echo ""
96
+
97
+ # Test 2: Inference request
98
+ echo "🔍 Test 2: Inference request"
99
+
100
+ # Determine content type and sample payload based on model package content types
101
+ <% if (typeof supportedContentTypes !== 'undefined' && supportedContentTypes && supportedContentTypes.length > 0) { %>
102
+ CONTENT_TYPE="<%= supportedContentTypes[0] %>"
103
+ <% } else { %>
104
+ CONTENT_TYPE="application/json"
105
+ <% } %>
106
+
107
+ # Create sample payload based on content type
108
+ case "${CONTENT_TYPE}" in
109
+ application/json)
110
+ TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50}}'
111
+ echo " Content-Type: ${CONTENT_TYPE}"
112
+ echo " Payload: Sample JSON inference request"
113
+ ;;
114
+ text/csv)
115
+ TEST_PAYLOAD='1.0,2.0,3.0,4.0'
116
+ echo " Content-Type: ${CONTENT_TYPE}"
117
+ echo " Payload: Sample CSV data"
118
+ ;;
119
+ *)
120
+ TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50}}'
121
+ echo " Content-Type: ${CONTENT_TYPE}"
122
+ echo " Payload: Sample inference request"
123
+ ;;
124
+ esac
125
+
126
+ echo " Invoking SageMaker endpoint..."
127
+
128
+ # Create temporary file for payload
129
+ TEMP_PAYLOAD=$(mktemp)
130
+ echo "${TEST_PAYLOAD}" > "${TEMP_PAYLOAD}"
131
+
132
+ # Create temporary file for response
133
+ TEMP_RESPONSE=$(mktemp)
134
+
135
+ INVOKE_ERROR=$(mktemp)
136
+ if ! aws sagemaker-runtime invoke-endpoint \
137
+ --endpoint-name "${ENDPOINT_NAME}" \
138
+ --region "${AWS_REGION}" \
139
+ --content-type "${CONTENT_TYPE}" \
140
+ --body "fileb://${TEMP_PAYLOAD}" \
141
+ "${TEMP_RESPONSE}" 2>"${INVOKE_ERROR}"; then
142
+ echo "❌ Inference request failed"
143
+ echo " Error: $(cat "${INVOKE_ERROR}")"
144
+ rm -f "${TEMP_PAYLOAD}" "${TEMP_RESPONSE}" "${INVOKE_ERROR}"
145
+ exit 1
146
+ fi
147
+ rm -f "${INVOKE_ERROR}"
148
+
149
+ # Read response
150
+ RESPONSE_BODY=$(cat "${TEMP_RESPONSE}")
151
+
152
+ # Clean up temp files
153
+ rm -f "${TEMP_PAYLOAD}" "${TEMP_RESPONSE}"
154
+
155
+ echo "✅ Inference request successful"
156
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
157
+ if [ ${#RESPONSE_BODY} -gt 200 ]; then
158
+ echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
159
+ fi
160
+
161
+ echo ""
162
+ echo "✅ All tests passed!"
163
+ echo ""
164
+ echo "📋 What's next?"
165
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
166
+ echo " • Benchmark performance: ./do/benchmark"
167
+ <% } %>
168
+ echo " • Register this deployment: ./do/register"
169
+ echo " • View logs: ./do/logs"
170
+ echo " • Clean up resources: ./do/clean"
171
+
172
+ <% } else if (deploymentTarget === 'async-inference') { %>
173
+ # ============================================================
174
+ # Async Inference Testing
175
+ # ============================================================
176
+
177
+ echo "🧪 Testing marketplace async endpoint"
178
+ echo " Project: ${PROJECT_NAME}"
179
+ echo " Model package: ${MODEL_PACKAGE_ARN}"
180
+ echo " Region: ${AWS_REGION}"
181
+ echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
182
+ echo ""
183
+
184
+ # Verify endpoint exists and is InService
185
+ ENDPOINT_NAME="${ENDPOINT_NAME:-${PROJECT_NAME}}"
186
+
187
+ echo "🔍 Test 1: Endpoint health check"
188
+ echo " Checking endpoint status..."
189
+
190
+ if ! ENDPOINT_STATUS=$(aws sagemaker describe-endpoint \
191
+ --endpoint-name "${ENDPOINT_NAME}" \
192
+ --region "${AWS_REGION}" \
193
+ --query 'EndpointStatus' \
194
+ --output text 2>&1); then
195
+ echo "❌ Endpoint not found or not accessible"
196
+ echo " Error: ${ENDPOINT_STATUS}"
197
+ echo ""
198
+ echo " Make sure you have deployed first: ./do/deploy"
199
+ exit 1
200
+ fi
201
+
202
+ if [ "${ENDPOINT_STATUS}" = "InService" ]; then
203
+ echo "✅ Endpoint is InService"
204
+ else
205
+ echo "❌ Endpoint is not InService (Status: ${ENDPOINT_STATUS})"
206
+ exit 1
207
+ fi
208
+
209
+ echo ""
210
+
211
+ # Test 2: Async inference request
212
+ echo "🔍 Test 2: Async inference request"
213
+
214
+ # Determine content type and sample payload based on model package content types
215
+ <% if (typeof supportedContentTypes !== 'undefined' && supportedContentTypes && supportedContentTypes.length > 0) { %>
216
+ CONTENT_TYPE="<%= supportedContentTypes[0] %>"
217
+ <% } else { %>
218
+ CONTENT_TYPE="application/json"
219
+ <% } %>
220
+
221
+ # Create sample payload based on content type
222
+ case "${CONTENT_TYPE}" in
223
+ application/json)
224
+ TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50}}'
225
+ echo " Content-Type: ${CONTENT_TYPE}"
226
+ echo " Payload: Sample JSON inference request"
227
+ ;;
228
+ text/csv)
229
+ TEST_PAYLOAD='1.0,2.0,3.0,4.0'
230
+ echo " Content-Type: ${CONTENT_TYPE}"
231
+ echo " Payload: Sample CSV data"
232
+ ;;
233
+ *)
234
+ TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50}}'
235
+ echo " Content-Type: ${CONTENT_TYPE}"
236
+ echo " Payload: Sample inference request"
237
+ ;;
238
+ esac
239
+
240
+ echo " Uploading test payload to S3..."
241
+
242
+ # Create temporary file for payload
243
+ TEMP_PAYLOAD=$(mktemp)
244
+ echo "${TEST_PAYLOAD}" > "${TEMP_PAYLOAD}"
245
+
246
+ # Upload payload to S3 input location
247
+ ASYNC_INPUT_KEY="${PROJECT_NAME}/input/test-payload-$(date +%s).json"
248
+ ASYNC_S3_BUCKET=$(echo "${ASYNC_S3_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
249
+ S3_INPUT_LOCATION="s3://${ASYNC_S3_BUCKET}/${ASYNC_INPUT_KEY}"
250
+
251
+ if ! aws s3 cp "${TEMP_PAYLOAD}" "${S3_INPUT_LOCATION}" --region "${AWS_REGION}" &> /dev/null; then
252
+ echo "❌ Failed to upload test payload to S3"
253
+ echo " Location: ${S3_INPUT_LOCATION}"
254
+ echo " Check that your IAM credentials have s3:PutObject permission"
255
+ rm -f "${TEMP_PAYLOAD}"
256
+ exit 1
257
+ fi
258
+ rm -f "${TEMP_PAYLOAD}"
259
+ echo "✅ Test payload uploaded to: ${S3_INPUT_LOCATION}"
260
+
261
+ # Invoke endpoint asynchronously
262
+ echo " Invoking async endpoint..."
263
+
264
+ INVOKE_ARGS=(
265
+ --endpoint-name "${ENDPOINT_NAME}"
266
+ --input-location "${S3_INPUT_LOCATION}"
267
+ --region "${AWS_REGION}"
268
+ --content-type "${CONTENT_TYPE}"
269
+ )
270
+
271
+ if ! INVOKE_RESULT=$(aws sagemaker-runtime invoke-endpoint-async \
272
+ "${INVOKE_ARGS[@]}" 2>&1); then
273
+ echo "❌ Async invocation failed"
274
+ echo " Error: ${INVOKE_RESULT}"
275
+ exit 1
276
+ fi
277
+
278
+ # Extract output location from response
279
+ OUTPUT_LOCATION=$(echo "${INVOKE_RESULT}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('OutputLocation',''))" 2>/dev/null || echo "")
280
+
281
+ if [ -z "${OUTPUT_LOCATION}" ]; then
282
+ echo "⚠️ Async invocation accepted but no output location returned"
283
+ echo " Check the S3 output path for results: ${ASYNC_S3_OUTPUT_PATH}"
284
+ echo ""
285
+ echo "✅ Async invocation submitted successfully"
286
+ else
287
+ echo "✅ Async invocation accepted"
288
+ echo " Output location: ${OUTPUT_LOCATION}"
289
+
290
+ # Poll S3 output location for result
291
+ POLL_TIMEOUT=300
292
+ POLL_INTERVAL=10
293
+ ELAPSED=0
294
+
295
+ echo "⏳ Polling for async result (timeout: ${POLL_TIMEOUT}s)..."
296
+
297
+ while [ ${ELAPSED} -lt ${POLL_TIMEOUT} ]; do
298
+ if aws s3 ls "${OUTPUT_LOCATION}" --region "${AWS_REGION}" &> /dev/null; then
299
+ echo "✅ Async inference result available"
300
+
301
+ # Download and display result
302
+ TEMP_RESULT=$(mktemp)
303
+ if aws s3 cp "${OUTPUT_LOCATION}" "${TEMP_RESULT}" --region "${AWS_REGION}" &> /dev/null; then
304
+ RESPONSE_BODY=$(cat "${TEMP_RESULT}")
305
+ rm -f "${TEMP_RESULT}"
306
+
307
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
308
+ if [ ${#RESPONSE_BODY} -gt 200 ]; then
309
+ echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
310
+ fi
311
+ else
312
+ rm -f "${TEMP_RESULT}"
313
+ echo "⚠️ Result exists but could not be downloaded"
314
+ fi
315
+ break
316
+ fi
317
+
318
+ sleep ${POLL_INTERVAL}
319
+ ELAPSED=$((ELAPSED + POLL_INTERVAL))
320
+ echo " ⏳ Waiting... (${ELAPSED}s / ${POLL_TIMEOUT}s)"
321
+ done
322
+
323
+ if [ ${ELAPSED} -ge ${POLL_TIMEOUT} ]; then
324
+ echo "❌ Async inference timed out after ${POLL_TIMEOUT}s"
325
+ echo ""
326
+ echo " The request may still be processing. Check:"
327
+ echo " • S3 output path: ${OUTPUT_LOCATION}"
328
+ echo " • CloudWatch Logs: /aws/sagemaker/Endpoints/${ENDPOINT_NAME}"
329
+ echo " • Endpoint status: aws sagemaker describe-endpoint --endpoint-name ${ENDPOINT_NAME} --region ${AWS_REGION}"
330
+ exit 1
331
+ fi
332
+ fi
333
+
334
+ echo ""
335
+ echo "✅ All tests passed!"
336
+ echo ""
337
+ echo "📋 What's next?"
338
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
339
+ echo " • Benchmark performance: ./do/benchmark"
340
+ <% } %>
341
+ echo " • Check async output: aws s3 ls ${ASYNC_S3_OUTPUT_PATH}"
342
+ echo " • Register this deployment: ./do/register"
343
+ echo " • View logs: ./do/logs"
344
+ echo " • Clean up resources: ./do/clean"
345
+
346
+ <% } else if (deploymentTarget === 'batch-transform') { %>
347
+ # ============================================================
348
+ # Batch Transform Testing
349
+ # ============================================================
350
+
351
+ echo "🧪 Checking marketplace batch transform job status"
352
+ echo " Project: ${PROJECT_NAME}"
353
+ echo " Model package: ${MODEL_PACKAGE_ARN}"
354
+ echo " Region: ${AWS_REGION}"
355
+ echo " S3 input: ${BATCH_INPUT_PATH}"
356
+ echo " S3 output: ${BATCH_OUTPUT_PATH}"
357
+ echo ""
358
+
359
+ # Get transform job name from config
360
+ TRANSFORM_JOB_NAME="${TRANSFORM_JOB_NAME:-}"
361
+ if [ -z "${TRANSFORM_JOB_NAME}" ]; then
362
+ echo "❌ No transform job name found"
363
+ echo " Run ./do/deploy first to create a transform job"
364
+ exit 1
365
+ fi
366
+
367
+ echo "🔍 Checking transform job: ${TRANSFORM_JOB_NAME}"
368
+
369
+ if ! JOB_STATUS_JSON=$(aws sagemaker describe-transform-job \
370
+ --transform-job-name "${TRANSFORM_JOB_NAME}" \
371
+ --region "${AWS_REGION}" 2>&1); then
372
+ echo "❌ Failed to describe transform job"
373
+ echo " Error: ${JOB_STATUS_JSON}"
374
+ exit 1
375
+ fi
376
+
377
+ JOB_STATUS=$(echo "${JOB_STATUS_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('TransformJobStatus','Unknown'))" 2>/dev/null || echo "Unknown")
378
+
379
+ case "${JOB_STATUS}" in
380
+ Completed)
381
+ echo "✅ Transform job completed successfully"
382
+ echo ""
383
+
384
+ # Download results locally
385
+ LOCAL_OUTPUT_DIR="${SCRIPT_DIR}/../batch-output"
386
+ mkdir -p "${LOCAL_OUTPUT_DIR}"
387
+ echo "📥 Downloading results to ${LOCAL_OUTPUT_DIR}/"
388
+ if aws s3 sync "${BATCH_OUTPUT_PATH}" "${LOCAL_OUTPUT_DIR}/" --region "${AWS_REGION}"; then
389
+ DOWNLOADED=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | wc -l | tr -d ' ')
390
+ echo "✅ Downloaded ${DOWNLOADED} file(s) to ${LOCAL_OUTPUT_DIR}/"
391
+ echo ""
392
+
393
+ # Display first output file preview
394
+ FIRST_FILE=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | head -1)
395
+ if [ -n "${FIRST_FILE}" ]; then
396
+ echo "📄 Sample output (${FIRST_FILE}):"
397
+ head -5 "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}"
398
+ LINES=$(wc -l < "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}" | tr -d ' ')
399
+ if [ "${LINES}" -gt 5 ]; then
400
+ echo " ... (${LINES} total lines)"
401
+ fi
402
+ fi
403
+ else
404
+ echo "⚠️ Could not download output files"
405
+ fi
406
+
407
+ echo ""
408
+ echo "✅ All tests passed!"
409
+ echo ""
410
+ echo "📋 What's next?"
411
+ echo " • View results: cat batch-output/"
412
+ echo " • Register this deployment: ./do/register"
413
+ echo " • View logs: ./do/logs"
414
+ echo " • Clean up resources: ./do/clean"
415
+ ;;
416
+ InProgress)
417
+ echo "⏳ Transform job is still in progress"
418
+
419
+ # Extract progress details
420
+ CREATION_TIME=$(echo "${JOB_STATUS_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('CreationTime','Unknown'))" 2>/dev/null || echo "Unknown")
421
+ echo " Started: ${CREATION_TIME}"
422
+ echo " Status: InProgress"
423
+ echo ""
424
+ echo " The job is still running. Check again later:"
425
+ echo " ./do/test"
426
+ echo ""
427
+ echo " View logs:"
428
+ echo " ./do/logs"
429
+ ;;
430
+ Failed)
431
+ echo "❌ Transform job failed"
432
+
433
+ FAILURE_REASON=$(echo "${JOB_STATUS_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('FailureReason','Unknown'))" 2>/dev/null || echo "Unknown")
434
+ echo " Reason: ${FAILURE_REASON}"
435
+ echo ""
436
+ echo " View logs for more details:"
437
+ echo " ./do/logs"
438
+ exit 1
439
+ ;;
440
+ Stopped)
441
+ echo "⚠️ Transform job was stopped"
442
+ echo " The job was manually stopped before completion"
443
+ echo ""
444
+ echo " To start a new job, run:"
445
+ echo " ./do/deploy"
446
+ ;;
447
+ *)
448
+ echo "⚠️ Transform job status: ${JOB_STATUS}"
449
+ echo " Check again later: ./do/test"
450
+ ;;
451
+ esac
452
+
453
+ <% } %>