@aws/ml-container-creator 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/LICENSE-THIRD-PARTY +68620 -0
- package/NOTICE +2 -0
- package/README.md +106 -0
- package/bin/cli.js +365 -0
- package/config/defaults.json +32 -0
- package/config/presets/transformers-djl.json +26 -0
- package/config/presets/transformers-gpu.json +24 -0
- package/config/presets/transformers-lmi.json +27 -0
- package/package.json +129 -0
- package/servers/README.md +419 -0
- package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
- package/servers/base-image-picker/catalogs/python-slim.json +38 -0
- package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
- package/servers/base-image-picker/catalogs/triton.json +38 -0
- package/servers/base-image-picker/index.js +495 -0
- package/servers/base-image-picker/manifest.json +17 -0
- package/servers/base-image-picker/package.json +15 -0
- package/servers/hyperpod-cluster-picker/LICENSE +202 -0
- package/servers/hyperpod-cluster-picker/index.js +424 -0
- package/servers/hyperpod-cluster-picker/manifest.json +14 -0
- package/servers/hyperpod-cluster-picker/package.json +17 -0
- package/servers/instance-recommender/LICENSE +202 -0
- package/servers/instance-recommender/catalogs/instances.json +852 -0
- package/servers/instance-recommender/index.js +284 -0
- package/servers/instance-recommender/manifest.json +16 -0
- package/servers/instance-recommender/package.json +15 -0
- package/servers/lib/LICENSE +202 -0
- package/servers/lib/bedrock-client.js +160 -0
- package/servers/lib/custom-validators.js +46 -0
- package/servers/lib/dynamic-resolver.js +36 -0
- package/servers/lib/package.json +11 -0
- package/servers/lib/schemas/image-catalog.schema.json +185 -0
- package/servers/lib/schemas/instances.schema.json +124 -0
- package/servers/lib/schemas/manifest.schema.json +64 -0
- package/servers/lib/schemas/model-catalog.schema.json +91 -0
- package/servers/lib/schemas/regions.schema.json +26 -0
- package/servers/lib/schemas/triton-backends.schema.json +51 -0
- package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
- package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
- package/servers/model-picker/catalogs/popular-transformers.json +226 -0
- package/servers/model-picker/index.js +1693 -0
- package/servers/model-picker/manifest.json +18 -0
- package/servers/model-picker/package.json +20 -0
- package/servers/region-picker/LICENSE +202 -0
- package/servers/region-picker/catalogs/regions.json +263 -0
- package/servers/region-picker/index.js +230 -0
- package/servers/region-picker/manifest.json +16 -0
- package/servers/region-picker/package.json +15 -0
- package/src/app.js +1007 -0
- package/src/copy-tpl.js +77 -0
- package/src/lib/accelerator-validator.js +39 -0
- package/src/lib/asset-manager.js +385 -0
- package/src/lib/aws-profile-parser.js +181 -0
- package/src/lib/bootstrap-command-handler.js +1647 -0
- package/src/lib/bootstrap-config.js +238 -0
- package/src/lib/ci-register-helpers.js +124 -0
- package/src/lib/ci-report-helpers.js +158 -0
- package/src/lib/ci-stage-helpers.js +268 -0
- package/src/lib/cli-handler.js +529 -0
- package/src/lib/comment-generator.js +544 -0
- package/src/lib/community-reports-validator.js +91 -0
- package/src/lib/config-manager.js +2106 -0
- package/src/lib/configuration-exporter.js +204 -0
- package/src/lib/configuration-manager.js +695 -0
- package/src/lib/configuration-matcher.js +221 -0
- package/src/lib/cpu-validator.js +36 -0
- package/src/lib/cuda-validator.js +57 -0
- package/src/lib/deployment-config-resolver.js +103 -0
- package/src/lib/deployment-entry-schema.js +125 -0
- package/src/lib/deployment-registry.js +598 -0
- package/src/lib/docker-introspection-validator.js +51 -0
- package/src/lib/engine-prefix-resolver.js +60 -0
- package/src/lib/huggingface-client.js +172 -0
- package/src/lib/key-value-parser.js +37 -0
- package/src/lib/known-flags-validator.js +200 -0
- package/src/lib/manifest-cli.js +280 -0
- package/src/lib/mcp-client.js +303 -0
- package/src/lib/mcp-command-handler.js +532 -0
- package/src/lib/neuron-validator.js +80 -0
- package/src/lib/parameter-schema-validator.js +284 -0
- package/src/lib/prompt-runner.js +1349 -0
- package/src/lib/prompts.js +1138 -0
- package/src/lib/registry-command-handler.js +519 -0
- package/src/lib/registry-loader.js +198 -0
- package/src/lib/rocm-validator.js +80 -0
- package/src/lib/schema-validator.js +157 -0
- package/src/lib/sensitive-redactor.js +59 -0
- package/src/lib/template-engine.js +156 -0
- package/src/lib/template-manager.js +341 -0
- package/src/lib/validation-engine.js +314 -0
- package/src/prompt-adapter.js +63 -0
- package/templates/Dockerfile +300 -0
- package/templates/IAM_PERMISSIONS.md +84 -0
- package/templates/MIGRATION.md +488 -0
- package/templates/PROJECT_README.md +439 -0
- package/templates/TEMPLATE_SYSTEM.md +243 -0
- package/templates/buildspec.yml +64 -0
- package/templates/code/chat_template.jinja +1 -0
- package/templates/code/flask/gunicorn_config.py +35 -0
- package/templates/code/flask/wsgi.py +10 -0
- package/templates/code/model_handler.py +387 -0
- package/templates/code/serve +300 -0
- package/templates/code/serve.py +175 -0
- package/templates/code/serving.properties +105 -0
- package/templates/code/start_server.py +39 -0
- package/templates/code/start_server.sh +39 -0
- package/templates/diffusors/Dockerfile +72 -0
- package/templates/diffusors/patch_image_api.py +35 -0
- package/templates/diffusors/serve +115 -0
- package/templates/diffusors/start_server.sh +114 -0
- package/templates/do/.gitkeep +1 -0
- package/templates/do/README.md +541 -0
- package/templates/do/build +83 -0
- package/templates/do/ci +681 -0
- package/templates/do/clean +811 -0
- package/templates/do/config +260 -0
- package/templates/do/deploy +1560 -0
- package/templates/do/export +306 -0
- package/templates/do/logs +319 -0
- package/templates/do/manifest +12 -0
- package/templates/do/push +119 -0
- package/templates/do/register +580 -0
- package/templates/do/run +113 -0
- package/templates/do/submit +417 -0
- package/templates/do/test +1147 -0
- package/templates/hyperpod/configmap.yaml +24 -0
- package/templates/hyperpod/deployment.yaml +71 -0
- package/templates/hyperpod/pvc.yaml +42 -0
- package/templates/hyperpod/service.yaml +17 -0
- package/templates/nginx-diffusors.conf +74 -0
- package/templates/nginx-predictors.conf +47 -0
- package/templates/nginx-tensorrt.conf +74 -0
- package/templates/requirements.txt +61 -0
- package/templates/sample_model/test_inference.py +123 -0
- package/templates/sample_model/train_abalone.py +252 -0
- package/templates/test/test_endpoint.sh +79 -0
- package/templates/test/test_local_image.sh +80 -0
- package/templates/test/test_model_handler.py +180 -0
- package/templates/triton/Dockerfile +128 -0
- package/templates/triton/config.pbtxt +163 -0
- package/templates/triton/model.py +130 -0
- package/templates/triton/requirements.txt +11 -0
|
@@ -0,0 +1,1147 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
set -u
|
|
7
|
+
set -o pipefail
|
|
8
|
+
|
|
9
|
+
# Source configuration
|
|
10
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
|
+
source "${SCRIPT_DIR}/config"
|
|
12
|
+
|
|
13
|
+
<% if (deploymentTarget === 'managed-inference') { %>
|
|
14
|
+
# ============================================================
|
|
15
|
+
# SageMaker Managed Inference Testing
|
|
16
|
+
# ============================================================
|
|
17
|
+
|
|
18
|
+
# Parse arguments
|
|
19
|
+
ENDPOINT_NAME="${1:-${ENDPOINT_NAME:-}}"
|
|
20
|
+
|
|
21
|
+
if [ -z "${ENDPOINT_NAME}" ]; then
|
|
22
|
+
echo "🧪 Testing local container at localhost:8080"
|
|
23
|
+
echo " Project: ${PROJECT_NAME}"
|
|
24
|
+
echo " Framework: ${FRAMEWORK}"
|
|
25
|
+
echo " Model server: ${MODEL_SERVER}"
|
|
26
|
+
TARGET_URL="http://localhost:8080"
|
|
27
|
+
TEST_MODE="local"
|
|
28
|
+
else
|
|
29
|
+
echo "🧪 Testing SageMaker endpoint: ${ENDPOINT_NAME}"
|
|
30
|
+
echo " Project: ${PROJECT_NAME}"
|
|
31
|
+
echo " Framework: ${FRAMEWORK}"
|
|
32
|
+
echo " Model server: ${MODEL_SERVER}"
|
|
33
|
+
echo " Region: ${AWS_REGION}"
|
|
34
|
+
TEST_MODE="sagemaker"
|
|
35
|
+
fi
|
|
36
|
+
|
|
37
|
+
echo ""
|
|
38
|
+
|
|
39
|
+
# Test 1: Health check (/ping)
|
|
40
|
+
echo "🔍 Test 1: Health check"
|
|
41
|
+
if [ "${TEST_MODE}" = "local" ]; then
|
|
42
|
+
echo " Sending GET request to ${TARGET_URL}/ping"
|
|
43
|
+
|
|
44
|
+
if ! PING_RESPONSE=$(curl -s -w "\n%{http_code}" -X GET "${TARGET_URL}/ping" 2>&1); then
|
|
45
|
+
echo "❌ Health check failed: Could not connect to local container"
|
|
46
|
+
echo " Make sure the container is running: ./do/run"
|
|
47
|
+
exit 1
|
|
48
|
+
fi
|
|
49
|
+
|
|
50
|
+
HTTP_CODE=$(echo "${PING_RESPONSE}" | tail -n1)
|
|
51
|
+
RESPONSE_BODY=$(echo "${PING_RESPONSE}" | sed '$d')
|
|
52
|
+
|
|
53
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
54
|
+
echo "✅ Health check passed (HTTP ${HTTP_CODE})"
|
|
55
|
+
else
|
|
56
|
+
echo "❌ Health check failed (HTTP ${HTTP_CODE})"
|
|
57
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
58
|
+
exit 1
|
|
59
|
+
fi
|
|
60
|
+
else
|
|
61
|
+
# For SageMaker endpoints, /ping is not directly accessible
|
|
62
|
+
# We'll verify the endpoint exists and is InService
|
|
63
|
+
echo " Checking endpoint status..."
|
|
64
|
+
|
|
65
|
+
if ! ENDPOINT_STATUS=$(aws sagemaker describe-endpoint \
|
|
66
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
67
|
+
--region "${AWS_REGION}" \
|
|
68
|
+
--query 'EndpointStatus' \
|
|
69
|
+
--output text 2>&1); then
|
|
70
|
+
echo "❌ Endpoint not found or not accessible"
|
|
71
|
+
echo " Error: ${ENDPOINT_STATUS}"
|
|
72
|
+
exit 1
|
|
73
|
+
fi
|
|
74
|
+
|
|
75
|
+
if [ "${ENDPOINT_STATUS}" = "InService" ]; then
|
|
76
|
+
echo "✅ Endpoint is InService"
|
|
77
|
+
else
|
|
78
|
+
echo "❌ Endpoint is not InService (Status: ${ENDPOINT_STATUS})"
|
|
79
|
+
exit 1
|
|
80
|
+
fi
|
|
81
|
+
fi
|
|
82
|
+
|
|
83
|
+
echo ""
|
|
84
|
+
|
|
85
|
+
# Test 2: Inference request (/invocations)
|
|
86
|
+
echo "🔍 Test 2: Inference request"
|
|
87
|
+
|
|
88
|
+
# Create framework-specific test payload
|
|
89
|
+
case "${FRAMEWORK}" in
|
|
90
|
+
sklearn|xgboost)
|
|
91
|
+
# Traditional ML: JSON with instances array
|
|
92
|
+
TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
|
|
93
|
+
echo " Payload: Sample feature vector"
|
|
94
|
+
;;
|
|
95
|
+
tensorflow)
|
|
96
|
+
# TensorFlow: JSON with instances array
|
|
97
|
+
TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
|
|
98
|
+
echo " Payload: Sample feature vector"
|
|
99
|
+
;;
|
|
100
|
+
transformers)
|
|
101
|
+
# Transformers: payload format depends on model server
|
|
102
|
+
case "${MODEL_SERVER}" in
|
|
103
|
+
vllm|sglang)
|
|
104
|
+
# OpenAI-compatible chat completions format
|
|
105
|
+
# For S3/JumpStart models, vLLM registers the model under the local path
|
|
106
|
+
VLLM_MODEL_NAME="${MODEL_NAME}"
|
|
107
|
+
if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
|
|
108
|
+
VLLM_MODEL_NAME="/opt/ml/model"
|
|
109
|
+
fi
|
|
110
|
+
TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
|
|
111
|
+
echo " Payload: OpenAI-compatible chat completion request"
|
|
112
|
+
echo " Model: ${VLLM_MODEL_NAME}"
|
|
113
|
+
;;
|
|
114
|
+
*)
|
|
115
|
+
# HuggingFace-style format for LMI, DJL, TensorRT-LLM
|
|
116
|
+
TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50, "temperature": 0.7}}'
|
|
117
|
+
echo " Payload: HuggingFace-style text generation request"
|
|
118
|
+
;;
|
|
119
|
+
esac
|
|
120
|
+
;;
|
|
121
|
+
diffusors)
|
|
122
|
+
# Diffusors: OpenAI DALL-E compatible image generation request
|
|
123
|
+
TEST_PAYLOAD='{"prompt": "A white cat", "n": 1, "size": "512x512"}'
|
|
124
|
+
echo " Payload: OpenAI DALL-E compatible image generation request"
|
|
125
|
+
;;
|
|
126
|
+
*)
|
|
127
|
+
echo "❌ Unknown framework: ${FRAMEWORK}"
|
|
128
|
+
exit 3
|
|
129
|
+
;;
|
|
130
|
+
esac
|
|
131
|
+
|
|
132
|
+
if [ "${TEST_MODE}" = "local" ]; then
|
|
133
|
+
if [ "${FRAMEWORK}" = "diffusors" ]; then
|
|
134
|
+
echo " Sending POST request to ${TARGET_URL}/v1/images/generations"
|
|
135
|
+
|
|
136
|
+
if ! INVOKE_RESPONSE=$(curl -s -m 120 -w "\n%{http_code}" -X POST "${TARGET_URL}/v1/images/generations" \
|
|
137
|
+
-H "Content-Type: application/json" \
|
|
138
|
+
-d "${TEST_PAYLOAD}" 2>&1); then
|
|
139
|
+
echo "❌ Inference request failed: Could not connect to local container"
|
|
140
|
+
exit 1
|
|
141
|
+
fi
|
|
142
|
+
|
|
143
|
+
HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
|
|
144
|
+
RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
|
|
145
|
+
|
|
146
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
147
|
+
# Validate response contains data array with b64_json
|
|
148
|
+
if echo "${RESPONSE_BODY}" | python3 -c "
|
|
149
|
+
import sys, json
|
|
150
|
+
resp = json.load(sys.stdin)
|
|
151
|
+
assert 'data' in resp, 'Missing data array'
|
|
152
|
+
assert len(resp['data']) > 0, 'Empty data array'
|
|
153
|
+
assert 'b64_json' in resp['data'][0], 'Missing b64_json in data entry'
|
|
154
|
+
" 2>/dev/null; then
|
|
155
|
+
echo "✅ Image generation successful (HTTP ${HTTP_CODE})"
|
|
156
|
+
echo " Response contains valid data array with b64_json image"
|
|
157
|
+
|
|
158
|
+
# Save generated image to file
|
|
159
|
+
OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
|
|
160
|
+
if echo "${RESPONSE_BODY}" | python3 -c "
|
|
161
|
+
import sys, json, base64
|
|
162
|
+
resp = json.load(sys.stdin)
|
|
163
|
+
img_data = base64.b64decode(resp['data'][0]['b64_json'])
|
|
164
|
+
with open('${OUTPUT_IMAGE}', 'wb') as f:
|
|
165
|
+
f.write(img_data)
|
|
166
|
+
" 2>/dev/null; then
|
|
167
|
+
echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
|
|
168
|
+
fi
|
|
169
|
+
else
|
|
170
|
+
echo "⚠️ Image generation returned HTTP ${HTTP_CODE} but response format unexpected"
|
|
171
|
+
echo " Response preview: ${RESPONSE_BODY:0:200}"
|
|
172
|
+
fi
|
|
173
|
+
else
|
|
174
|
+
echo "❌ Image generation failed (HTTP ${HTTP_CODE})"
|
|
175
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
176
|
+
exit 1
|
|
177
|
+
fi
|
|
178
|
+
else
|
|
179
|
+
echo " Sending POST request to ${TARGET_URL}/invocations"
|
|
180
|
+
|
|
181
|
+
if ! INVOKE_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "${TARGET_URL}/invocations" \
|
|
182
|
+
-H "Content-Type: application/json" \
|
|
183
|
+
-d "${TEST_PAYLOAD}" 2>&1); then
|
|
184
|
+
echo "❌ Inference request failed: Could not connect to local container"
|
|
185
|
+
exit 1
|
|
186
|
+
fi
|
|
187
|
+
|
|
188
|
+
HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
|
|
189
|
+
RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
|
|
190
|
+
|
|
191
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
192
|
+
echo "✅ Inference request successful (HTTP ${HTTP_CODE})"
|
|
193
|
+
echo " Response preview: ${RESPONSE_BODY:0:200}"
|
|
194
|
+
if [ ${#RESPONSE_BODY} -gt 200 ]; then
|
|
195
|
+
echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
|
|
196
|
+
fi
|
|
197
|
+
else
|
|
198
|
+
echo "❌ Inference request failed (HTTP ${HTTP_CODE})"
|
|
199
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
200
|
+
exit 1
|
|
201
|
+
fi
|
|
202
|
+
fi
|
|
203
|
+
else
|
|
204
|
+
echo " Invoking SageMaker endpoint..."
|
|
205
|
+
|
|
206
|
+
# Create temporary file for payload
|
|
207
|
+
TEMP_PAYLOAD=$(mktemp)
|
|
208
|
+
echo "${TEST_PAYLOAD}" > "${TEMP_PAYLOAD}"
|
|
209
|
+
|
|
210
|
+
# Create temporary file for response
|
|
211
|
+
TEMP_RESPONSE=$(mktemp)
|
|
212
|
+
|
|
213
|
+
# Invoke endpoint via inference component
|
|
214
|
+
IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
|
|
215
|
+
INVOKE_ARGS=(
|
|
216
|
+
--endpoint-name "${ENDPOINT_NAME}"
|
|
217
|
+
--region "${AWS_REGION}"
|
|
218
|
+
--content-type "application/json"
|
|
219
|
+
--body "fileb://${TEMP_PAYLOAD}"
|
|
220
|
+
)
|
|
221
|
+
if [ -n "${IC_NAME}" ]; then
|
|
222
|
+
INVOKE_ARGS+=(--inference-component-name "${IC_NAME}")
|
|
223
|
+
echo " Inference component: ${IC_NAME}"
|
|
224
|
+
fi
|
|
225
|
+
|
|
226
|
+
INVOKE_ERROR=$(mktemp)
|
|
227
|
+
if ! aws sagemaker-runtime invoke-endpoint \
|
|
228
|
+
"${INVOKE_ARGS[@]}" \
|
|
229
|
+
"${TEMP_RESPONSE}" 2>"${INVOKE_ERROR}"; then
|
|
230
|
+
echo "❌ Inference request failed"
|
|
231
|
+
echo " Error: $(cat "${INVOKE_ERROR}")"
|
|
232
|
+
rm -f "${TEMP_PAYLOAD}" "${TEMP_RESPONSE}" "${INVOKE_ERROR}"
|
|
233
|
+
exit 1
|
|
234
|
+
fi
|
|
235
|
+
rm -f "${INVOKE_ERROR}"
|
|
236
|
+
|
|
237
|
+
# Read response
|
|
238
|
+
RESPONSE_BODY=$(cat "${TEMP_RESPONSE}")
|
|
239
|
+
|
|
240
|
+
# Clean up temp files
|
|
241
|
+
rm -f "${TEMP_PAYLOAD}" "${TEMP_RESPONSE}"
|
|
242
|
+
|
|
243
|
+
if [ "${FRAMEWORK}" = "diffusors" ]; then
|
|
244
|
+
# Validate response contains data array with b64_json
|
|
245
|
+
if echo "${RESPONSE_BODY}" | python3 -c "
|
|
246
|
+
import sys, json
|
|
247
|
+
resp = json.load(sys.stdin)
|
|
248
|
+
assert 'data' in resp, 'Missing data array'
|
|
249
|
+
assert len(resp['data']) > 0, 'Empty data array'
|
|
250
|
+
assert 'b64_json' in resp['data'][0], 'Missing b64_json in data entry'
|
|
251
|
+
" 2>/dev/null; then
|
|
252
|
+
echo "✅ Image generation successful"
|
|
253
|
+
echo " Response contains valid data array with b64_json image"
|
|
254
|
+
|
|
255
|
+
# Save generated image to file
|
|
256
|
+
OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
|
|
257
|
+
if echo "${RESPONSE_BODY}" | python3 -c "
|
|
258
|
+
import sys, json, base64
|
|
259
|
+
resp = json.load(sys.stdin)
|
|
260
|
+
img_data = base64.b64decode(resp['data'][0]['b64_json'])
|
|
261
|
+
with open('${OUTPUT_IMAGE}', 'wb') as f:
|
|
262
|
+
f.write(img_data)
|
|
263
|
+
" 2>/dev/null; then
|
|
264
|
+
echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
|
|
265
|
+
fi
|
|
266
|
+
else
|
|
267
|
+
echo "⚠️ Image generation returned but response format unexpected"
|
|
268
|
+
echo " Response preview: ${RESPONSE_BODY:0:200}"
|
|
269
|
+
fi
|
|
270
|
+
else
|
|
271
|
+
echo "✅ Inference request successful"
|
|
272
|
+
echo " Response preview: ${RESPONSE_BODY:0:200}"
|
|
273
|
+
if [ ${#RESPONSE_BODY} -gt 200 ]; then
|
|
274
|
+
echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
|
|
275
|
+
fi
|
|
276
|
+
fi
|
|
277
|
+
fi
|
|
278
|
+
|
|
279
|
+
echo ""
|
|
280
|
+
echo "✅ All tests passed!"
|
|
281
|
+
echo ""
|
|
282
|
+
|
|
283
|
+
if [ "${TEST_MODE}" = "local" ]; then
|
|
284
|
+
echo "Next steps:"
|
|
285
|
+
echo " • Push to ECR: ./do/push"
|
|
286
|
+
echo " • Deploy to SageMaker: ./do/deploy"
|
|
287
|
+
else
|
|
288
|
+
echo "Endpoint is ready for production use!"
|
|
289
|
+
echo " • Endpoint name: ${ENDPOINT_NAME}"
|
|
290
|
+
echo " • Region: ${AWS_REGION}"
|
|
291
|
+
echo ""
|
|
292
|
+
echo "📝 Register this deployment:"
|
|
293
|
+
echo " ./do/register"
|
|
294
|
+
fi
|
|
295
|
+
|
|
296
|
+
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
297
|
+
# ============================================================
|
|
298
|
+
# SageMaker Managed Inference - Async Testing
|
|
299
|
+
# ============================================================
|
|
300
|
+
|
|
301
|
+
# Parse arguments
|
|
302
|
+
ENDPOINT_NAME="${1:-${ENDPOINT_NAME:-}}"
|
|
303
|
+
|
|
304
|
+
if [ -z "${ENDPOINT_NAME}" ]; then
|
|
305
|
+
echo "🧪 Testing local container at localhost:8080"
|
|
306
|
+
echo " Project: ${PROJECT_NAME}"
|
|
307
|
+
echo " Framework: ${FRAMEWORK}"
|
|
308
|
+
echo " Model server: ${MODEL_SERVER}"
|
|
309
|
+
TARGET_URL="http://localhost:8080"
|
|
310
|
+
TEST_MODE="local"
|
|
311
|
+
else
|
|
312
|
+
echo "🧪 Testing SageMaker async endpoint: ${ENDPOINT_NAME}"
|
|
313
|
+
echo " Project: ${PROJECT_NAME}"
|
|
314
|
+
echo " Framework: ${FRAMEWORK}"
|
|
315
|
+
echo " Model server: ${MODEL_SERVER}"
|
|
316
|
+
echo " Region: ${AWS_REGION}"
|
|
317
|
+
echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
318
|
+
TEST_MODE="sagemaker"
|
|
319
|
+
fi
|
|
320
|
+
|
|
321
|
+
echo ""
|
|
322
|
+
|
|
323
|
+
# Test 1: Health check (/ping)
|
|
324
|
+
echo "🔍 Test 1: Health check"
|
|
325
|
+
if [ "${TEST_MODE}" = "local" ]; then
|
|
326
|
+
echo " Sending GET request to ${TARGET_URL}/ping"
|
|
327
|
+
|
|
328
|
+
if ! PING_RESPONSE=$(curl -s -w "\n%{http_code}" -X GET "${TARGET_URL}/ping" 2>&1); then
|
|
329
|
+
echo "❌ Health check failed: Could not connect to local container"
|
|
330
|
+
echo " Make sure the container is running: ./do/run"
|
|
331
|
+
exit 1
|
|
332
|
+
fi
|
|
333
|
+
|
|
334
|
+
HTTP_CODE=$(echo "${PING_RESPONSE}" | tail -n1)
|
|
335
|
+
RESPONSE_BODY=$(echo "${PING_RESPONSE}" | sed '$d')
|
|
336
|
+
|
|
337
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
338
|
+
echo "✅ Health check passed (HTTP ${HTTP_CODE})"
|
|
339
|
+
else
|
|
340
|
+
echo "❌ Health check failed (HTTP ${HTTP_CODE})"
|
|
341
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
342
|
+
exit 1
|
|
343
|
+
fi
|
|
344
|
+
else
|
|
345
|
+
# For SageMaker endpoints, verify the endpoint exists and is InService
|
|
346
|
+
echo " Checking endpoint status..."
|
|
347
|
+
|
|
348
|
+
if ! ENDPOINT_STATUS=$(aws sagemaker describe-endpoint \
|
|
349
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
350
|
+
--region "${AWS_REGION}" \
|
|
351
|
+
--query 'EndpointStatus' \
|
|
352
|
+
--output text 2>&1); then
|
|
353
|
+
echo "❌ Endpoint not found or not accessible"
|
|
354
|
+
echo " Error: ${ENDPOINT_STATUS}"
|
|
355
|
+
exit 1
|
|
356
|
+
fi
|
|
357
|
+
|
|
358
|
+
if [ "${ENDPOINT_STATUS}" = "InService" ]; then
|
|
359
|
+
echo "✅ Endpoint is InService"
|
|
360
|
+
else
|
|
361
|
+
echo "❌ Endpoint is not InService (Status: ${ENDPOINT_STATUS})"
|
|
362
|
+
exit 1
|
|
363
|
+
fi
|
|
364
|
+
fi
|
|
365
|
+
|
|
366
|
+
echo ""
|
|
367
|
+
|
|
368
|
+
# Test 2: Inference request
|
|
369
|
+
echo "🔍 Test 2: Inference request"
|
|
370
|
+
|
|
371
|
+
# Create framework-specific test payload
|
|
372
|
+
case "${FRAMEWORK}" in
|
|
373
|
+
sklearn|xgboost)
|
|
374
|
+
TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
|
|
375
|
+
echo " Payload: Sample feature vector"
|
|
376
|
+
;;
|
|
377
|
+
tensorflow)
|
|
378
|
+
TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
|
|
379
|
+
echo " Payload: Sample feature vector"
|
|
380
|
+
;;
|
|
381
|
+
transformers)
|
|
382
|
+
case "${MODEL_SERVER}" in
|
|
383
|
+
vllm|sglang)
|
|
384
|
+
VLLM_MODEL_NAME="${MODEL_NAME}"
|
|
385
|
+
if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
|
|
386
|
+
VLLM_MODEL_NAME="/opt/ml/model"
|
|
387
|
+
fi
|
|
388
|
+
TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
|
|
389
|
+
echo " Payload: OpenAI-compatible chat completion request"
|
|
390
|
+
echo " Model: ${VLLM_MODEL_NAME}"
|
|
391
|
+
;;
|
|
392
|
+
*)
|
|
393
|
+
TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50, "temperature": 0.7}}'
|
|
394
|
+
echo " Payload: HuggingFace-style text generation request"
|
|
395
|
+
;;
|
|
396
|
+
esac
|
|
397
|
+
;;
|
|
398
|
+
diffusors)
|
|
399
|
+
TEST_PAYLOAD='{"prompt": "A white cat", "n": 1, "size": "512x512"}'
|
|
400
|
+
echo " Payload: OpenAI DALL-E compatible image generation request"
|
|
401
|
+
;;
|
|
402
|
+
*)
|
|
403
|
+
echo "❌ Unknown framework: ${FRAMEWORK}"
|
|
404
|
+
exit 3
|
|
405
|
+
;;
|
|
406
|
+
esac
|
|
407
|
+
|
|
408
|
+
if [ "${TEST_MODE}" = "local" ]; then
|
|
409
|
+
if [ "${FRAMEWORK}" = "diffusors" ]; then
|
|
410
|
+
echo " Sending POST request to ${TARGET_URL}/v1/images/generations"
|
|
411
|
+
|
|
412
|
+
if ! INVOKE_RESPONSE=$(curl -s -m 120 -w "\n%{http_code}" -X POST "${TARGET_URL}/v1/images/generations" \
|
|
413
|
+
-H "Content-Type: application/json" \
|
|
414
|
+
-d "${TEST_PAYLOAD}" 2>&1); then
|
|
415
|
+
echo "❌ Inference request failed: Could not connect to local container"
|
|
416
|
+
exit 1
|
|
417
|
+
fi
|
|
418
|
+
|
|
419
|
+
HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
|
|
420
|
+
RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
|
|
421
|
+
|
|
422
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
423
|
+
if echo "${RESPONSE_BODY}" | python3 -c "
|
|
424
|
+
import sys, json
|
|
425
|
+
resp = json.load(sys.stdin)
|
|
426
|
+
assert 'data' in resp, 'Missing data array'
|
|
427
|
+
assert len(resp['data']) > 0, 'Empty data array'
|
|
428
|
+
assert 'b64_json' in resp['data'][0], 'Missing b64_json in data entry'
|
|
429
|
+
" 2>/dev/null; then
|
|
430
|
+
echo "✅ Image generation successful (HTTP ${HTTP_CODE})"
|
|
431
|
+
echo " Response contains valid data array with b64_json image"
|
|
432
|
+
|
|
433
|
+
# Save generated image to file
|
|
434
|
+
OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
|
|
435
|
+
if echo "${RESPONSE_BODY}" | python3 -c "
|
|
436
|
+
import sys, json, base64
|
|
437
|
+
resp = json.load(sys.stdin)
|
|
438
|
+
img_data = base64.b64decode(resp['data'][0]['b64_json'])
|
|
439
|
+
with open('${OUTPUT_IMAGE}', 'wb') as f:
|
|
440
|
+
f.write(img_data)
|
|
441
|
+
" 2>/dev/null; then
|
|
442
|
+
echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
|
|
443
|
+
fi
|
|
444
|
+
else
|
|
445
|
+
echo "⚠️ Image generation returned HTTP ${HTTP_CODE} but response format unexpected"
|
|
446
|
+
echo " Response preview: ${RESPONSE_BODY:0:200}"
|
|
447
|
+
fi
|
|
448
|
+
else
|
|
449
|
+
echo "❌ Image generation failed (HTTP ${HTTP_CODE})"
|
|
450
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
451
|
+
exit 1
|
|
452
|
+
fi
|
|
453
|
+
else
|
|
454
|
+
echo " Sending POST request to ${TARGET_URL}/invocations"
|
|
455
|
+
|
|
456
|
+
if ! INVOKE_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "${TARGET_URL}/invocations" \
|
|
457
|
+
-H "Content-Type: application/json" \
|
|
458
|
+
-d "${TEST_PAYLOAD}" 2>&1); then
|
|
459
|
+
echo "❌ Inference request failed: Could not connect to local container"
|
|
460
|
+
exit 1
|
|
461
|
+
fi
|
|
462
|
+
|
|
463
|
+
HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
|
|
464
|
+
RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
|
|
465
|
+
|
|
466
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
467
|
+
echo "✅ Inference request successful (HTTP ${HTTP_CODE})"
|
|
468
|
+
echo " Response preview: ${RESPONSE_BODY:0:200}"
|
|
469
|
+
if [ ${#RESPONSE_BODY} -gt 200 ]; then
|
|
470
|
+
echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
|
|
471
|
+
fi
|
|
472
|
+
else
|
|
473
|
+
echo "❌ Inference request failed (HTTP ${HTTP_CODE})"
|
|
474
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
475
|
+
exit 1
|
|
476
|
+
fi
|
|
477
|
+
fi
|
|
478
|
+
else
|
|
479
|
+
# SageMaker async invocation: upload payload to S3, invoke async, poll for result
|
|
480
|
+
echo " Uploading test payload to S3..."
|
|
481
|
+
|
|
482
|
+
# Create temporary file for payload
|
|
483
|
+
TEMP_PAYLOAD=$(mktemp)
|
|
484
|
+
echo "${TEST_PAYLOAD}" > "${TEMP_PAYLOAD}"
|
|
485
|
+
|
|
486
|
+
# Upload payload to S3 input location
|
|
487
|
+
ASYNC_INPUT_KEY="${PROJECT_NAME}/input/test-payload-$(date +%s).json"
|
|
488
|
+
ASYNC_S3_BUCKET=$(echo "${ASYNC_S3_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
|
|
489
|
+
S3_INPUT_LOCATION="s3://${ASYNC_S3_BUCKET}/${ASYNC_INPUT_KEY}"
|
|
490
|
+
|
|
491
|
+
if ! aws s3 cp "${TEMP_PAYLOAD}" "${S3_INPUT_LOCATION}" --region "${AWS_REGION}" &> /dev/null; then
|
|
492
|
+
echo "❌ Failed to upload test payload to S3"
|
|
493
|
+
echo " Location: ${S3_INPUT_LOCATION}"
|
|
494
|
+
echo " Check that your IAM credentials have s3:PutObject permission"
|
|
495
|
+
rm -f "${TEMP_PAYLOAD}"
|
|
496
|
+
exit 1
|
|
497
|
+
fi
|
|
498
|
+
rm -f "${TEMP_PAYLOAD}"
|
|
499
|
+
echo "✅ Test payload uploaded to: ${S3_INPUT_LOCATION}"
|
|
500
|
+
|
|
501
|
+
# Invoke endpoint asynchronously (no inference components for async)
|
|
502
|
+
echo " Invoking async endpoint..."
|
|
503
|
+
|
|
504
|
+
INVOKE_ARGS=(
|
|
505
|
+
--endpoint-name "${ENDPOINT_NAME}"
|
|
506
|
+
--input-location "${S3_INPUT_LOCATION}"
|
|
507
|
+
--region "${AWS_REGION}"
|
|
508
|
+
--content-type "application/json"
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
if ! INVOKE_RESULT=$(aws sagemaker-runtime invoke-endpoint-async \
|
|
512
|
+
"${INVOKE_ARGS[@]}" 2>&1); then
|
|
513
|
+
echo "❌ Async invocation failed"
|
|
514
|
+
echo " Error: ${INVOKE_RESULT}"
|
|
515
|
+
exit 1
|
|
516
|
+
fi
|
|
517
|
+
|
|
518
|
+
# Extract output location from response
|
|
519
|
+
OUTPUT_LOCATION=$(echo "${INVOKE_RESULT}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('OutputLocation',''))" 2>/dev/null || echo "")
|
|
520
|
+
|
|
521
|
+
if [ -z "${OUTPUT_LOCATION}" ]; then
|
|
522
|
+
echo "⚠️ Async invocation accepted but no output location returned"
|
|
523
|
+
echo " Check the S3 output path for results: ${ASYNC_S3_OUTPUT_PATH}"
|
|
524
|
+
echo ""
|
|
525
|
+
echo "✅ Async invocation submitted successfully"
|
|
526
|
+
else
|
|
527
|
+
echo "✅ Async invocation accepted"
|
|
528
|
+
echo " Output location: ${OUTPUT_LOCATION}"
|
|
529
|
+
|
|
530
|
+
# Poll S3 output location for result
|
|
531
|
+
POLL_TIMEOUT=300
|
|
532
|
+
POLL_INTERVAL=10
|
|
533
|
+
ELAPSED=0
|
|
534
|
+
|
|
535
|
+
echo "⏳ Polling for async result (timeout: ${POLL_TIMEOUT}s)..."
|
|
536
|
+
|
|
537
|
+
while [ ${ELAPSED} -lt ${POLL_TIMEOUT} ]; do
|
|
538
|
+
if aws s3 ls "${OUTPUT_LOCATION}" --region "${AWS_REGION}" &> /dev/null; then
|
|
539
|
+
echo "✅ Async inference result available"
|
|
540
|
+
|
|
541
|
+
# Download and display result
|
|
542
|
+
TEMP_RESULT=$(mktemp)
|
|
543
|
+
if aws s3 cp "${OUTPUT_LOCATION}" "${TEMP_RESULT}" --region "${AWS_REGION}" &> /dev/null; then
|
|
544
|
+
RESPONSE_BODY=$(cat "${TEMP_RESULT}")
|
|
545
|
+
rm -f "${TEMP_RESULT}"
|
|
546
|
+
|
|
547
|
+
echo " Response preview: ${RESPONSE_BODY:0:200}"
|
|
548
|
+
if [ ${#RESPONSE_BODY} -gt 200 ]; then
|
|
549
|
+
echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
|
|
550
|
+
fi
|
|
551
|
+
|
|
552
|
+
# For diffusors, extract and save the generated image
|
|
553
|
+
if [ "${FRAMEWORK}" = "diffusors" ]; then
|
|
554
|
+
OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
|
|
555
|
+
if echo "${RESPONSE_BODY}" | python3 -c "
|
|
556
|
+
import sys, json, base64
|
|
557
|
+
resp = json.load(sys.stdin)
|
|
558
|
+
if 'data' in resp and len(resp['data']) > 0 and 'b64_json' in resp['data'][0]:
|
|
559
|
+
img_data = base64.b64decode(resp['data'][0]['b64_json'])
|
|
560
|
+
with open('${OUTPUT_IMAGE}', 'wb') as f:
|
|
561
|
+
f.write(img_data)
|
|
562
|
+
print('ok')
|
|
563
|
+
else:
|
|
564
|
+
print('skip')
|
|
565
|
+
" 2>/dev/null | grep -q "ok"; then
|
|
566
|
+
echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
|
|
567
|
+
fi
|
|
568
|
+
fi
|
|
569
|
+
else
|
|
570
|
+
rm -f "${TEMP_RESULT}"
|
|
571
|
+
echo "⚠️ Result exists but could not be downloaded"
|
|
572
|
+
fi
|
|
573
|
+
break
|
|
574
|
+
fi
|
|
575
|
+
|
|
576
|
+
sleep ${POLL_INTERVAL}
|
|
577
|
+
ELAPSED=$((ELAPSED + POLL_INTERVAL))
|
|
578
|
+
echo " ⏳ Waiting... (${ELAPSED}s / ${POLL_TIMEOUT}s)"
|
|
579
|
+
done
|
|
580
|
+
|
|
581
|
+
if [ ${ELAPSED} -ge ${POLL_TIMEOUT} ]; then
|
|
582
|
+
echo "❌ Async inference timed out after ${POLL_TIMEOUT}s"
|
|
583
|
+
echo ""
|
|
584
|
+
echo " The request may still be processing. Check:"
|
|
585
|
+
echo " • S3 output path: ${OUTPUT_LOCATION}"
|
|
586
|
+
echo " • CloudWatch Logs: /aws/sagemaker/Endpoints/${ENDPOINT_NAME}"
|
|
587
|
+
echo " • Endpoint status: aws sagemaker describe-endpoint --endpoint-name ${ENDPOINT_NAME} --region ${AWS_REGION}"
|
|
588
|
+
exit 1
|
|
589
|
+
fi
|
|
590
|
+
fi
|
|
591
|
+
fi
|
|
592
|
+
|
|
593
|
+
echo ""
|
|
594
|
+
echo "✅ All tests passed!"
|
|
595
|
+
echo ""
|
|
596
|
+
|
|
597
|
+
if [ "${TEST_MODE}" = "local" ]; then
|
|
598
|
+
echo "Next steps:"
|
|
599
|
+
echo " • Push to ECR: ./do/push"
|
|
600
|
+
echo " • Deploy to SageMaker: ./do/deploy"
|
|
601
|
+
else
|
|
602
|
+
echo "Async endpoint is ready for production use!"
|
|
603
|
+
echo " • Endpoint name: ${ENDPOINT_NAME}"
|
|
604
|
+
echo " • Region: ${AWS_REGION}"
|
|
605
|
+
echo " • S3 output: ${ASYNC_S3_OUTPUT_PATH}"
|
|
606
|
+
echo ""
|
|
607
|
+
echo "📝 Register this deployment:"
|
|
608
|
+
echo " ./do/register"
|
|
609
|
+
fi
|
|
610
|
+
|
|
611
|
+
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
612
|
+
# ============================================================
|
|
613
|
+
# HyperPod EKS Testing
|
|
614
|
+
# ============================================================
|
|
615
|
+
|
|
616
|
+
# Parse arguments: local or hyperpod test mode
|
|
617
|
+
# Default to hyperpod if no argument given (deployment target is hyperpod-eks)
|
|
618
|
+
TEST_TARGET="${1:-hyperpod}"
|
|
619
|
+
|
|
620
|
+
case "${TEST_TARGET}" in
|
|
621
|
+
local)
|
|
622
|
+
echo "🧪 Testing local container at localhost:8080"
|
|
623
|
+
echo " Project: ${PROJECT_NAME}"
|
|
624
|
+
echo " Framework: ${FRAMEWORK}"
|
|
625
|
+
echo " Model server: ${MODEL_SERVER}"
|
|
626
|
+
TARGET_URL="http://localhost:8080"
|
|
627
|
+
;;
|
|
628
|
+
hyperpod)
|
|
629
|
+
echo "🧪 Testing HyperPod EKS deployment"
|
|
630
|
+
echo " Project: ${PROJECT_NAME}"
|
|
631
|
+
echo " Framework: ${FRAMEWORK}"
|
|
632
|
+
echo " Model server: ${MODEL_SERVER}"
|
|
633
|
+
echo " Cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
634
|
+
echo " Namespace: ${HYPERPOD_NAMESPACE}"
|
|
635
|
+
echo " Region: ${AWS_REGION}"
|
|
636
|
+
echo ""
|
|
637
|
+
|
|
638
|
+
# Get kubeconfig for HyperPod cluster
|
|
639
|
+
echo "🔑 Configuring kubectl for HyperPod cluster..."
|
|
640
|
+
KUBECONFIG_PATH="${HOME}/.kube/hyperpod-${HYPERPOD_CLUSTER_NAME}"
|
|
641
|
+
|
|
642
|
+
EKS_CLUSTER_ARN=$(aws sagemaker describe-cluster \
|
|
643
|
+
--cluster-name "${HYPERPOD_CLUSTER_NAME}" \
|
|
644
|
+
--region "${AWS_REGION}" \
|
|
645
|
+
--query "Orchestrator.Eks.ClusterArn" \
|
|
646
|
+
--output text 2>&1) || {
|
|
647
|
+
echo "❌ Failed to describe HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
648
|
+
echo ""
|
|
649
|
+
echo " Check that:"
|
|
650
|
+
echo " • The cluster name is correct"
|
|
651
|
+
echo " • The cluster exists in region: ${AWS_REGION}"
|
|
652
|
+
echo " • Your IAM user/role has permission to access the cluster"
|
|
653
|
+
exit 4
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
EKS_CLUSTER_NAME=$(echo "${EKS_CLUSTER_ARN}" | awk -F'/' '{print $NF}')
|
|
657
|
+
|
|
658
|
+
if ! aws eks update-kubeconfig \
|
|
659
|
+
--name "${EKS_CLUSTER_NAME}" \
|
|
660
|
+
--region "${AWS_REGION}" \
|
|
661
|
+
--kubeconfig "${KUBECONFIG_PATH}" 2>&1; then
|
|
662
|
+
echo "❌ Failed to configure kubectl for EKS cluster: ${EKS_CLUSTER_NAME}"
|
|
663
|
+
exit 4
|
|
664
|
+
fi
|
|
665
|
+
|
|
666
|
+
export KUBECONFIG="${KUBECONFIG_PATH}"
|
|
667
|
+
|
|
668
|
+
# Verify cluster connectivity
|
|
669
|
+
if ! kubectl cluster-info &> /dev/null; then
|
|
670
|
+
echo "❌ Cannot connect to HyperPod cluster"
|
|
671
|
+
exit 4
|
|
672
|
+
fi
|
|
673
|
+
echo "✅ Connected to HyperPod cluster"
|
|
674
|
+
|
|
675
|
+
# Port-forward the service to a local port
|
|
676
|
+
LOCAL_PORT=8080
|
|
677
|
+
echo ""
|
|
678
|
+
echo "🔌 Port-forwarding svc/${PROJECT_NAME} to localhost:${LOCAL_PORT}..."
|
|
679
|
+
kubectl port-forward "svc/${PROJECT_NAME}" "${LOCAL_PORT}:8080" \
|
|
680
|
+
-n "${HYPERPOD_NAMESPACE}" &
|
|
681
|
+
PF_PID=$!
|
|
682
|
+
|
|
683
|
+
# Wait for port-forward to establish
|
|
684
|
+
sleep 3
|
|
685
|
+
|
|
686
|
+
# Ensure cleanup on exit
|
|
687
|
+
trap "kill ${PF_PID} 2>/dev/null || true" EXIT
|
|
688
|
+
|
|
689
|
+
# Verify port-forward is running
|
|
690
|
+
if ! kill -0 ${PF_PID} 2>/dev/null; then
|
|
691
|
+
echo "❌ Port-forward failed to start"
|
|
692
|
+
echo ""
|
|
693
|
+
echo " Check that:"
|
|
694
|
+
echo " • The service exists: kubectl get svc ${PROJECT_NAME} -n ${HYPERPOD_NAMESPACE}"
|
|
695
|
+
echo " • The deployment is running: kubectl get pods -n ${HYPERPOD_NAMESPACE}"
|
|
696
|
+
exit 1
|
|
697
|
+
fi
|
|
698
|
+
echo "✅ Port-forward established"
|
|
699
|
+
|
|
700
|
+
TARGET_URL="http://localhost:${LOCAL_PORT}"
|
|
701
|
+
;;
|
|
702
|
+
*)
|
|
703
|
+
echo "Usage: ./do/test [local|hyperpod]"
|
|
704
|
+
echo ""
|
|
705
|
+
echo "Test modes:"
|
|
706
|
+
echo " local - Test local container at localhost:8080"
|
|
707
|
+
echo " hyperpod - Test HyperPod EKS deployment via port-forward"
|
|
708
|
+
exit 1
|
|
709
|
+
;;
|
|
710
|
+
esac
|
|
711
|
+
|
|
712
|
+
echo ""
|
|
713
|
+
|
|
714
|
+
# Test 1: Health check (/ping)
|
|
715
|
+
echo "🔍 Test 1: Health check"
|
|
716
|
+
echo " Sending GET request to ${TARGET_URL}/ping"
|
|
717
|
+
|
|
718
|
+
if ! PING_RESPONSE=$(curl -s -w "\n%{http_code}" -X GET "${TARGET_URL}/ping" 2>&1); then
|
|
719
|
+
echo "❌ Health check failed: Could not connect"
|
|
720
|
+
if [ "${TEST_TARGET}" = "local" ]; then
|
|
721
|
+
echo " Make sure the container is running: ./do/run"
|
|
722
|
+
else
|
|
723
|
+
echo " Check that the port-forward is working and pods are running"
|
|
724
|
+
fi
|
|
725
|
+
exit 1
|
|
726
|
+
fi
|
|
727
|
+
|
|
728
|
+
HTTP_CODE=$(echo "${PING_RESPONSE}" | tail -n1)
|
|
729
|
+
RESPONSE_BODY=$(echo "${PING_RESPONSE}" | sed '$d')
|
|
730
|
+
|
|
731
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
732
|
+
echo "✅ Health check passed (HTTP ${HTTP_CODE})"
|
|
733
|
+
else
|
|
734
|
+
echo "❌ Health check failed (HTTP ${HTTP_CODE})"
|
|
735
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
736
|
+
exit 1
|
|
737
|
+
fi
|
|
738
|
+
|
|
739
|
+
echo ""
|
|
740
|
+
|
|
741
|
+
# Test 2: Inference request (/invocations)
|
|
742
|
+
echo "🔍 Test 2: Inference request"
|
|
743
|
+
|
|
744
|
+
# Create framework-specific test payload
|
|
745
|
+
case "${FRAMEWORK}" in
|
|
746
|
+
sklearn|xgboost)
|
|
747
|
+
# Traditional ML: JSON with instances array
|
|
748
|
+
TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
|
|
749
|
+
echo " Payload: Sample feature vector"
|
|
750
|
+
;;
|
|
751
|
+
tensorflow)
|
|
752
|
+
# TensorFlow: JSON with instances array
|
|
753
|
+
TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
|
|
754
|
+
echo " Payload: Sample feature vector"
|
|
755
|
+
;;
|
|
756
|
+
transformers)
|
|
757
|
+
# Transformers: payload format depends on model server
|
|
758
|
+
case "${MODEL_SERVER}" in
|
|
759
|
+
vllm|sglang)
|
|
760
|
+
# OpenAI-compatible chat completions format
|
|
761
|
+
VLLM_MODEL_NAME="${MODEL_NAME}"
|
|
762
|
+
if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
|
|
763
|
+
VLLM_MODEL_NAME="/opt/ml/model"
|
|
764
|
+
fi
|
|
765
|
+
TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
|
|
766
|
+
echo " Payload: OpenAI-compatible chat completion request"
|
|
767
|
+
echo " Model: ${VLLM_MODEL_NAME}"
|
|
768
|
+
;;
|
|
769
|
+
*)
|
|
770
|
+
# HuggingFace-style format for LMI, DJL, TensorRT-LLM
|
|
771
|
+
TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50, "temperature": 0.7}}'
|
|
772
|
+
echo " Payload: HuggingFace-style text generation request"
|
|
773
|
+
;;
|
|
774
|
+
esac
|
|
775
|
+
;;
|
|
776
|
+
diffusors)
|
|
777
|
+
# Diffusors: OpenAI DALL-E compatible image generation request
|
|
778
|
+
TEST_PAYLOAD='{"prompt": "A white cat", "n": 1, "size": "512x512"}'
|
|
779
|
+
echo " Payload: OpenAI DALL-E compatible image generation request"
|
|
780
|
+
;;
|
|
781
|
+
*)
|
|
782
|
+
echo "❌ Unknown framework: ${FRAMEWORK}"
|
|
783
|
+
exit 3
|
|
784
|
+
;;
|
|
785
|
+
esac
|
|
786
|
+
|
|
787
|
+
if [ "${FRAMEWORK}" = "diffusors" ]; then
|
|
788
|
+
echo " Sending POST request to ${TARGET_URL}/v1/images/generations"
|
|
789
|
+
|
|
790
|
+
if ! INVOKE_RESPONSE=$(curl -s -m 120 -w "\n%{http_code}" -X POST "${TARGET_URL}/v1/images/generations" \
|
|
791
|
+
-H "Content-Type: application/json" \
|
|
792
|
+
-d "${TEST_PAYLOAD}" 2>&1); then
|
|
793
|
+
echo "❌ Inference request failed: Could not connect"
|
|
794
|
+
exit 1
|
|
795
|
+
fi
|
|
796
|
+
|
|
797
|
+
HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
|
|
798
|
+
RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
|
|
799
|
+
|
|
800
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
801
|
+
# Validate response contains data array with b64_json
|
|
802
|
+
if echo "${RESPONSE_BODY}" | python3 -c "
|
|
803
|
+
import sys, json
|
|
804
|
+
resp = json.load(sys.stdin)
|
|
805
|
+
assert 'data' in resp, 'Missing data array'
|
|
806
|
+
assert len(resp['data']) > 0, 'Empty data array'
|
|
807
|
+
assert 'b64_json' in resp['data'][0], 'Missing b64_json in data entry'
|
|
808
|
+
" 2>/dev/null; then
|
|
809
|
+
echo "✅ Image generation successful (HTTP ${HTTP_CODE})"
|
|
810
|
+
echo " Response contains valid data array with b64_json image"
|
|
811
|
+
|
|
812
|
+
# Save generated image to file
|
|
813
|
+
OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
|
|
814
|
+
if echo "${RESPONSE_BODY}" | python3 -c "
|
|
815
|
+
import sys, json, base64
|
|
816
|
+
resp = json.load(sys.stdin)
|
|
817
|
+
img_data = base64.b64decode(resp['data'][0]['b64_json'])
|
|
818
|
+
with open('${OUTPUT_IMAGE}', 'wb') as f:
|
|
819
|
+
f.write(img_data)
|
|
820
|
+
" 2>/dev/null; then
|
|
821
|
+
echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
|
|
822
|
+
fi
|
|
823
|
+
else
|
|
824
|
+
echo "⚠️ Image generation returned HTTP ${HTTP_CODE} but response format unexpected"
|
|
825
|
+
echo " Response preview: ${RESPONSE_BODY:0:200}"
|
|
826
|
+
fi
|
|
827
|
+
else
|
|
828
|
+
echo "❌ Image generation failed (HTTP ${HTTP_CODE})"
|
|
829
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
830
|
+
exit 1
|
|
831
|
+
fi
|
|
832
|
+
else
|
|
833
|
+
echo " Sending POST request to ${TARGET_URL}/invocations"
|
|
834
|
+
|
|
835
|
+
if ! INVOKE_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "${TARGET_URL}/invocations" \
|
|
836
|
+
-H "Content-Type: application/json" \
|
|
837
|
+
-d "${TEST_PAYLOAD}" 2>&1); then
|
|
838
|
+
echo "❌ Inference request failed: Could not connect"
|
|
839
|
+
exit 1
|
|
840
|
+
fi
|
|
841
|
+
|
|
842
|
+
HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
|
|
843
|
+
RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
|
|
844
|
+
|
|
845
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
846
|
+
echo "✅ Inference request successful (HTTP ${HTTP_CODE})"
|
|
847
|
+
echo " Response preview: ${RESPONSE_BODY:0:200}"
|
|
848
|
+
if [ ${#RESPONSE_BODY} -gt 200 ]; then
|
|
849
|
+
echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
|
|
850
|
+
fi
|
|
851
|
+
else
|
|
852
|
+
echo "❌ Inference request failed (HTTP ${HTTP_CODE})"
|
|
853
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
854
|
+
exit 1
|
|
855
|
+
fi
|
|
856
|
+
fi
|
|
857
|
+
|
|
858
|
+
echo ""
|
|
859
|
+
echo "✅ All tests passed!"
|
|
860
|
+
echo ""
|
|
861
|
+
|
|
862
|
+
if [ "${TEST_TARGET}" = "local" ]; then
|
|
863
|
+
echo "Next steps:"
|
|
864
|
+
echo " • Push to ECR: ./do/push"
|
|
865
|
+
echo " • Deploy to HyperPod: ./do/deploy"
|
|
866
|
+
else
|
|
867
|
+
echo "HyperPod deployment is ready for production use!"
|
|
868
|
+
echo " • Cluster: ${HYPERPOD_CLUSTER_NAME}"
|
|
869
|
+
echo " • Namespace: ${HYPERPOD_NAMESPACE}"
|
|
870
|
+
echo " • Service: ${PROJECT_NAME}"
|
|
871
|
+
echo ""
|
|
872
|
+
echo "📝 Register this deployment:"
|
|
873
|
+
echo " ./do/register"
|
|
874
|
+
fi
|
|
875
|
+
|
|
876
|
+
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
877
|
+
# ============================================================
|
|
878
|
+
# SageMaker Managed Inference - Batch Testing
|
|
879
|
+
# ============================================================
|
|
880
|
+
|
|
881
|
+
# Parse arguments: local or batch test mode
|
|
882
|
+
# Default to batch if no argument given (deployment target is batch-transform)
|
|
883
|
+
TEST_TARGET="${1:-batch}"
|
|
884
|
+
|
|
885
|
+
case "${TEST_TARGET}" in
|
|
886
|
+
local)
|
|
887
|
+
echo "🧪 Testing local container at localhost:8080"
|
|
888
|
+
echo " Project: ${PROJECT_NAME}"
|
|
889
|
+
echo " Framework: ${FRAMEWORK}"
|
|
890
|
+
echo " Model server: ${MODEL_SERVER}"
|
|
891
|
+
TARGET_URL="http://localhost:8080"
|
|
892
|
+
;;
|
|
893
|
+
batch)
|
|
894
|
+
echo "🧪 Checking batch transform job status"
|
|
895
|
+
echo " Project: ${PROJECT_NAME}"
|
|
896
|
+
echo " Framework: ${FRAMEWORK}"
|
|
897
|
+
echo " Region: ${AWS_REGION}"
|
|
898
|
+
echo " S3 input: ${BATCH_INPUT_PATH}"
|
|
899
|
+
echo " S3 output: ${BATCH_OUTPUT_PATH}"
|
|
900
|
+
echo ""
|
|
901
|
+
|
|
902
|
+
# Get transform job name from config
|
|
903
|
+
TRANSFORM_JOB_NAME="${TRANSFORM_JOB_NAME:-}"
|
|
904
|
+
if [ -z "${TRANSFORM_JOB_NAME}" ]; then
|
|
905
|
+
echo "❌ No transform job name found"
|
|
906
|
+
echo " Run ./do/deploy first to create a transform job"
|
|
907
|
+
exit 1
|
|
908
|
+
fi
|
|
909
|
+
|
|
910
|
+
echo "🔍 Checking transform job: ${TRANSFORM_JOB_NAME}"
|
|
911
|
+
|
|
912
|
+
if ! JOB_STATUS_JSON=$(aws sagemaker describe-transform-job \
|
|
913
|
+
--transform-job-name "${TRANSFORM_JOB_NAME}" \
|
|
914
|
+
--region "${AWS_REGION}" 2>&1); then
|
|
915
|
+
echo "❌ Failed to describe transform job"
|
|
916
|
+
echo " Error: ${JOB_STATUS_JSON}"
|
|
917
|
+
exit 1
|
|
918
|
+
fi
|
|
919
|
+
|
|
920
|
+
JOB_STATUS=$(echo "${JOB_STATUS_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('TransformJobStatus','Unknown'))" 2>/dev/null || echo "Unknown")
|
|
921
|
+
|
|
922
|
+
case "${JOB_STATUS}" in
|
|
923
|
+
Completed)
|
|
924
|
+
echo "✅ Transform job completed successfully"
|
|
925
|
+
echo ""
|
|
926
|
+
|
|
927
|
+
# Download results locally
|
|
928
|
+
LOCAL_OUTPUT_DIR="${SCRIPT_DIR}/../batch-output"
|
|
929
|
+
mkdir -p "${LOCAL_OUTPUT_DIR}"
|
|
930
|
+
echo "📥 Downloading results to ${LOCAL_OUTPUT_DIR}/"
|
|
931
|
+
if aws s3 sync "${BATCH_OUTPUT_PATH}" "${LOCAL_OUTPUT_DIR}/" --region "${AWS_REGION}"; then
|
|
932
|
+
DOWNLOADED=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | wc -l | tr -d ' ')
|
|
933
|
+
echo "✅ Downloaded ${DOWNLOADED} file(s) to ${LOCAL_OUTPUT_DIR}/"
|
|
934
|
+
echo ""
|
|
935
|
+
|
|
936
|
+
# Display first output file preview
|
|
937
|
+
FIRST_FILE=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | head -1)
|
|
938
|
+
if [ -n "${FIRST_FILE}" ]; then
|
|
939
|
+
echo "📄 Sample output (${FIRST_FILE}):"
|
|
940
|
+
head -5 "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}"
|
|
941
|
+
LINES=$(wc -l < "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}" | tr -d ' ')
|
|
942
|
+
if [ "${LINES}" -gt 5 ]; then
|
|
943
|
+
echo " ... (${LINES} total lines)"
|
|
944
|
+
fi
|
|
945
|
+
fi
|
|
946
|
+
else
|
|
947
|
+
echo "⚠️ Could not download output files"
|
|
948
|
+
fi
|
|
949
|
+
|
|
950
|
+
echo ""
|
|
951
|
+
echo "✅ All tests passed!"
|
|
952
|
+
echo ""
|
|
953
|
+
echo "📝 Register this deployment:"
|
|
954
|
+
echo " ./do/register"
|
|
955
|
+
;;
|
|
956
|
+
InProgress)
|
|
957
|
+
echo "⏳ Transform job is still in progress"
|
|
958
|
+
|
|
959
|
+
# Extract progress details
|
|
960
|
+
CREATION_TIME=$(echo "${JOB_STATUS_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('CreationTime','Unknown'))" 2>/dev/null || echo "Unknown")
|
|
961
|
+
echo " Started: ${CREATION_TIME}"
|
|
962
|
+
echo " Status: InProgress"
|
|
963
|
+
echo ""
|
|
964
|
+
echo " The job is still running. Check again later:"
|
|
965
|
+
echo " ./do/test"
|
|
966
|
+
echo ""
|
|
967
|
+
echo " View logs:"
|
|
968
|
+
echo " ./do/logs"
|
|
969
|
+
;;
|
|
970
|
+
Failed)
|
|
971
|
+
echo "❌ Transform job failed"
|
|
972
|
+
|
|
973
|
+
FAILURE_REASON=$(echo "${JOB_STATUS_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('FailureReason','Unknown'))" 2>/dev/null || echo "Unknown")
|
|
974
|
+
echo " Reason: ${FAILURE_REASON}"
|
|
975
|
+
echo ""
|
|
976
|
+
echo " View logs for more details:"
|
|
977
|
+
echo " ./do/logs"
|
|
978
|
+
exit 1
|
|
979
|
+
;;
|
|
980
|
+
Stopped)
|
|
981
|
+
echo "⚠️ Transform job was stopped"
|
|
982
|
+
echo " The job was manually stopped before completion"
|
|
983
|
+
echo ""
|
|
984
|
+
echo " To start a new job, run:"
|
|
985
|
+
echo " ./do/deploy"
|
|
986
|
+
;;
|
|
987
|
+
*)
|
|
988
|
+
echo "⚠️ Transform job status: ${JOB_STATUS}"
|
|
989
|
+
echo " Check again later: ./do/test"
|
|
990
|
+
;;
|
|
991
|
+
esac
|
|
992
|
+
exit 0
|
|
993
|
+
;;
|
|
994
|
+
*)
|
|
995
|
+
echo "Usage: ./do/test [local|batch]"
|
|
996
|
+
echo ""
|
|
997
|
+
echo "Test modes:"
|
|
998
|
+
echo " local - Test local container at localhost:8080"
|
|
999
|
+
echo " batch - Check transform job status and view results"
|
|
1000
|
+
exit 1
|
|
1001
|
+
;;
|
|
1002
|
+
esac
|
|
1003
|
+
|
|
1004
|
+
echo ""
|
|
1005
|
+
|
|
1006
|
+
# Test 1: Health check (/ping)
|
|
1007
|
+
echo "🔍 Test 1: Health check"
|
|
1008
|
+
echo " Sending GET request to ${TARGET_URL}/ping"
|
|
1009
|
+
|
|
1010
|
+
if ! PING_RESPONSE=$(curl -s -w "\n%{http_code}" -X GET "${TARGET_URL}/ping" 2>&1); then
|
|
1011
|
+
echo "❌ Health check failed: Could not connect to local container"
|
|
1012
|
+
echo " Make sure the container is running: ./do/run"
|
|
1013
|
+
exit 1
|
|
1014
|
+
fi
|
|
1015
|
+
|
|
1016
|
+
HTTP_CODE=$(echo "${PING_RESPONSE}" | tail -n1)
|
|
1017
|
+
RESPONSE_BODY=$(echo "${PING_RESPONSE}" | sed '$d')
|
|
1018
|
+
|
|
1019
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
1020
|
+
echo "✅ Health check passed (HTTP ${HTTP_CODE})"
|
|
1021
|
+
else
|
|
1022
|
+
echo "❌ Health check failed (HTTP ${HTTP_CODE})"
|
|
1023
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
1024
|
+
exit 1
|
|
1025
|
+
fi
|
|
1026
|
+
|
|
1027
|
+
echo ""
|
|
1028
|
+
|
|
1029
|
+
# Test 2: Inference request (/invocations)
|
|
1030
|
+
echo "🔍 Test 2: Inference request"
|
|
1031
|
+
|
|
1032
|
+
# Create framework-specific test payload
|
|
1033
|
+
case "${FRAMEWORK}" in
|
|
1034
|
+
sklearn|xgboost)
|
|
1035
|
+
TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
|
|
1036
|
+
echo " Payload: Sample feature vector"
|
|
1037
|
+
;;
|
|
1038
|
+
tensorflow)
|
|
1039
|
+
TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
|
|
1040
|
+
echo " Payload: Sample feature vector"
|
|
1041
|
+
;;
|
|
1042
|
+
transformers)
|
|
1043
|
+
case "${MODEL_SERVER}" in
|
|
1044
|
+
vllm|sglang)
|
|
1045
|
+
VLLM_MODEL_NAME="${MODEL_NAME}"
|
|
1046
|
+
if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
|
|
1047
|
+
VLLM_MODEL_NAME="/opt/ml/model"
|
|
1048
|
+
fi
|
|
1049
|
+
TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
|
|
1050
|
+
echo " Payload: OpenAI-compatible chat completion request"
|
|
1051
|
+
echo " Model: ${VLLM_MODEL_NAME}"
|
|
1052
|
+
;;
|
|
1053
|
+
*)
|
|
1054
|
+
TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50, "temperature": 0.7}}'
|
|
1055
|
+
echo " Payload: HuggingFace-style text generation request"
|
|
1056
|
+
;;
|
|
1057
|
+
esac
|
|
1058
|
+
;;
|
|
1059
|
+
diffusors)
|
|
1060
|
+
TEST_PAYLOAD='{"prompt": "A white cat", "n": 1, "size": "512x512"}'
|
|
1061
|
+
echo " Payload: OpenAI DALL-E compatible image generation request"
|
|
1062
|
+
;;
|
|
1063
|
+
*)
|
|
1064
|
+
echo "❌ Unknown framework: ${FRAMEWORK}"
|
|
1065
|
+
exit 3
|
|
1066
|
+
;;
|
|
1067
|
+
esac
|
|
1068
|
+
|
|
1069
|
+
if [ "${FRAMEWORK}" = "diffusors" ]; then
|
|
1070
|
+
echo " Sending POST request to ${TARGET_URL}/v1/images/generations"
|
|
1071
|
+
|
|
1072
|
+
if ! INVOKE_RESPONSE=$(curl -s -m 120 -w "\n%{http_code}" -X POST "${TARGET_URL}/v1/images/generations" \
|
|
1073
|
+
-H "Content-Type: application/json" \
|
|
1074
|
+
-d "${TEST_PAYLOAD}" 2>&1); then
|
|
1075
|
+
echo "❌ Inference request failed: Could not connect to local container"
|
|
1076
|
+
exit 1
|
|
1077
|
+
fi
|
|
1078
|
+
|
|
1079
|
+
HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
|
|
1080
|
+
RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
|
|
1081
|
+
|
|
1082
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
1083
|
+
if echo "${RESPONSE_BODY}" | python3 -c "
|
|
1084
|
+
import sys, json
|
|
1085
|
+
resp = json.load(sys.stdin)
|
|
1086
|
+
assert 'data' in resp, 'Missing data array'
|
|
1087
|
+
assert len(resp['data']) > 0, 'Empty data array'
|
|
1088
|
+
assert 'b64_json' in resp['data'][0], 'Missing b64_json in data entry'
|
|
1089
|
+
" 2>/dev/null; then
|
|
1090
|
+
echo "✅ Image generation successful (HTTP ${HTTP_CODE})"
|
|
1091
|
+
echo " Response contains valid data array with b64_json image"
|
|
1092
|
+
|
|
1093
|
+
# Save generated image to file
|
|
1094
|
+
OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
|
|
1095
|
+
if echo "${RESPONSE_BODY}" | python3 -c "
|
|
1096
|
+
import sys, json, base64
|
|
1097
|
+
resp = json.load(sys.stdin)
|
|
1098
|
+
img_data = base64.b64decode(resp['data'][0]['b64_json'])
|
|
1099
|
+
with open('${OUTPUT_IMAGE}', 'wb') as f:
|
|
1100
|
+
f.write(img_data)
|
|
1101
|
+
" 2>/dev/null; then
|
|
1102
|
+
echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
|
|
1103
|
+
fi
|
|
1104
|
+
else
|
|
1105
|
+
echo "⚠️ Image generation returned HTTP ${HTTP_CODE} but response format unexpected"
|
|
1106
|
+
echo " Response preview: ${RESPONSE_BODY:0:200}"
|
|
1107
|
+
fi
|
|
1108
|
+
else
|
|
1109
|
+
echo "❌ Image generation failed (HTTP ${HTTP_CODE})"
|
|
1110
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
1111
|
+
exit 1
|
|
1112
|
+
fi
|
|
1113
|
+
else
|
|
1114
|
+
echo " Sending POST request to ${TARGET_URL}/invocations"
|
|
1115
|
+
|
|
1116
|
+
if ! INVOKE_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "${TARGET_URL}/invocations" \
|
|
1117
|
+
-H "Content-Type: application/json" \
|
|
1118
|
+
-d "${TEST_PAYLOAD}" 2>&1); then
|
|
1119
|
+
echo "❌ Inference request failed: Could not connect to local container"
|
|
1120
|
+
exit 1
|
|
1121
|
+
fi
|
|
1122
|
+
|
|
1123
|
+
HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
|
|
1124
|
+
RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
|
|
1125
|
+
|
|
1126
|
+
if [ "${HTTP_CODE}" = "200" ]; then
|
|
1127
|
+
echo "✅ Inference request successful (HTTP ${HTTP_CODE})"
|
|
1128
|
+
echo " Response preview: ${RESPONSE_BODY:0:200}"
|
|
1129
|
+
if [ ${#RESPONSE_BODY} -gt 200 ]; then
|
|
1130
|
+
echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
|
|
1131
|
+
fi
|
|
1132
|
+
else
|
|
1133
|
+
echo "❌ Inference request failed (HTTP ${HTTP_CODE})"
|
|
1134
|
+
echo " Response: ${RESPONSE_BODY}"
|
|
1135
|
+
exit 1
|
|
1136
|
+
fi
|
|
1137
|
+
fi
|
|
1138
|
+
|
|
1139
|
+
echo ""
|
|
1140
|
+
echo "✅ All tests passed!"
|
|
1141
|
+
echo ""
|
|
1142
|
+
|
|
1143
|
+
echo "Next steps:"
|
|
1144
|
+
echo " • Push to ECR: ./do/push"
|
|
1145
|
+
echo " • Deploy batch transform: ./do/deploy"
|
|
1146
|
+
|
|
1147
|
+
<% } %>
|