@aws/ml-container-creator 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +202 -0
  2. package/LICENSE-THIRD-PARTY +68620 -0
  3. package/NOTICE +2 -0
  4. package/README.md +106 -0
  5. package/bin/cli.js +365 -0
  6. package/config/defaults.json +32 -0
  7. package/config/presets/transformers-djl.json +26 -0
  8. package/config/presets/transformers-gpu.json +24 -0
  9. package/config/presets/transformers-lmi.json +27 -0
  10. package/package.json +129 -0
  11. package/servers/README.md +419 -0
  12. package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
  13. package/servers/base-image-picker/catalogs/python-slim.json +38 -0
  14. package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
  15. package/servers/base-image-picker/catalogs/triton.json +38 -0
  16. package/servers/base-image-picker/index.js +495 -0
  17. package/servers/base-image-picker/manifest.json +17 -0
  18. package/servers/base-image-picker/package.json +15 -0
  19. package/servers/hyperpod-cluster-picker/LICENSE +202 -0
  20. package/servers/hyperpod-cluster-picker/index.js +424 -0
  21. package/servers/hyperpod-cluster-picker/manifest.json +14 -0
  22. package/servers/hyperpod-cluster-picker/package.json +17 -0
  23. package/servers/instance-recommender/LICENSE +202 -0
  24. package/servers/instance-recommender/catalogs/instances.json +852 -0
  25. package/servers/instance-recommender/index.js +284 -0
  26. package/servers/instance-recommender/manifest.json +16 -0
  27. package/servers/instance-recommender/package.json +15 -0
  28. package/servers/lib/LICENSE +202 -0
  29. package/servers/lib/bedrock-client.js +160 -0
  30. package/servers/lib/custom-validators.js +46 -0
  31. package/servers/lib/dynamic-resolver.js +36 -0
  32. package/servers/lib/package.json +11 -0
  33. package/servers/lib/schemas/image-catalog.schema.json +185 -0
  34. package/servers/lib/schemas/instances.schema.json +124 -0
  35. package/servers/lib/schemas/manifest.schema.json +64 -0
  36. package/servers/lib/schemas/model-catalog.schema.json +91 -0
  37. package/servers/lib/schemas/regions.schema.json +26 -0
  38. package/servers/lib/schemas/triton-backends.schema.json +51 -0
  39. package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
  40. package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
  41. package/servers/model-picker/catalogs/popular-transformers.json +226 -0
  42. package/servers/model-picker/index.js +1693 -0
  43. package/servers/model-picker/manifest.json +18 -0
  44. package/servers/model-picker/package.json +20 -0
  45. package/servers/region-picker/LICENSE +202 -0
  46. package/servers/region-picker/catalogs/regions.json +263 -0
  47. package/servers/region-picker/index.js +230 -0
  48. package/servers/region-picker/manifest.json +16 -0
  49. package/servers/region-picker/package.json +15 -0
  50. package/src/app.js +1007 -0
  51. package/src/copy-tpl.js +77 -0
  52. package/src/lib/accelerator-validator.js +39 -0
  53. package/src/lib/asset-manager.js +385 -0
  54. package/src/lib/aws-profile-parser.js +181 -0
  55. package/src/lib/bootstrap-command-handler.js +1647 -0
  56. package/src/lib/bootstrap-config.js +238 -0
  57. package/src/lib/ci-register-helpers.js +124 -0
  58. package/src/lib/ci-report-helpers.js +158 -0
  59. package/src/lib/ci-stage-helpers.js +268 -0
  60. package/src/lib/cli-handler.js +529 -0
  61. package/src/lib/comment-generator.js +544 -0
  62. package/src/lib/community-reports-validator.js +91 -0
  63. package/src/lib/config-manager.js +2106 -0
  64. package/src/lib/configuration-exporter.js +204 -0
  65. package/src/lib/configuration-manager.js +695 -0
  66. package/src/lib/configuration-matcher.js +221 -0
  67. package/src/lib/cpu-validator.js +36 -0
  68. package/src/lib/cuda-validator.js +57 -0
  69. package/src/lib/deployment-config-resolver.js +103 -0
  70. package/src/lib/deployment-entry-schema.js +125 -0
  71. package/src/lib/deployment-registry.js +598 -0
  72. package/src/lib/docker-introspection-validator.js +51 -0
  73. package/src/lib/engine-prefix-resolver.js +60 -0
  74. package/src/lib/huggingface-client.js +172 -0
  75. package/src/lib/key-value-parser.js +37 -0
  76. package/src/lib/known-flags-validator.js +200 -0
  77. package/src/lib/manifest-cli.js +280 -0
  78. package/src/lib/mcp-client.js +303 -0
  79. package/src/lib/mcp-command-handler.js +532 -0
  80. package/src/lib/neuron-validator.js +80 -0
  81. package/src/lib/parameter-schema-validator.js +284 -0
  82. package/src/lib/prompt-runner.js +1349 -0
  83. package/src/lib/prompts.js +1138 -0
  84. package/src/lib/registry-command-handler.js +519 -0
  85. package/src/lib/registry-loader.js +198 -0
  86. package/src/lib/rocm-validator.js +80 -0
  87. package/src/lib/schema-validator.js +157 -0
  88. package/src/lib/sensitive-redactor.js +59 -0
  89. package/src/lib/template-engine.js +156 -0
  90. package/src/lib/template-manager.js +341 -0
  91. package/src/lib/validation-engine.js +314 -0
  92. package/src/prompt-adapter.js +63 -0
  93. package/templates/Dockerfile +300 -0
  94. package/templates/IAM_PERMISSIONS.md +84 -0
  95. package/templates/MIGRATION.md +488 -0
  96. package/templates/PROJECT_README.md +439 -0
  97. package/templates/TEMPLATE_SYSTEM.md +243 -0
  98. package/templates/buildspec.yml +64 -0
  99. package/templates/code/chat_template.jinja +1 -0
  100. package/templates/code/flask/gunicorn_config.py +35 -0
  101. package/templates/code/flask/wsgi.py +10 -0
  102. package/templates/code/model_handler.py +387 -0
  103. package/templates/code/serve +300 -0
  104. package/templates/code/serve.py +175 -0
  105. package/templates/code/serving.properties +105 -0
  106. package/templates/code/start_server.py +39 -0
  107. package/templates/code/start_server.sh +39 -0
  108. package/templates/diffusors/Dockerfile +72 -0
  109. package/templates/diffusors/patch_image_api.py +35 -0
  110. package/templates/diffusors/serve +115 -0
  111. package/templates/diffusors/start_server.sh +114 -0
  112. package/templates/do/.gitkeep +1 -0
  113. package/templates/do/README.md +541 -0
  114. package/templates/do/build +83 -0
  115. package/templates/do/ci +681 -0
  116. package/templates/do/clean +811 -0
  117. package/templates/do/config +260 -0
  118. package/templates/do/deploy +1560 -0
  119. package/templates/do/export +306 -0
  120. package/templates/do/logs +319 -0
  121. package/templates/do/manifest +12 -0
  122. package/templates/do/push +119 -0
  123. package/templates/do/register +580 -0
  124. package/templates/do/run +113 -0
  125. package/templates/do/submit +417 -0
  126. package/templates/do/test +1147 -0
  127. package/templates/hyperpod/configmap.yaml +24 -0
  128. package/templates/hyperpod/deployment.yaml +71 -0
  129. package/templates/hyperpod/pvc.yaml +42 -0
  130. package/templates/hyperpod/service.yaml +17 -0
  131. package/templates/nginx-diffusors.conf +74 -0
  132. package/templates/nginx-predictors.conf +47 -0
  133. package/templates/nginx-tensorrt.conf +74 -0
  134. package/templates/requirements.txt +61 -0
  135. package/templates/sample_model/test_inference.py +123 -0
  136. package/templates/sample_model/train_abalone.py +252 -0
  137. package/templates/test/test_endpoint.sh +79 -0
  138. package/templates/test/test_local_image.sh +80 -0
  139. package/templates/test/test_model_handler.py +180 -0
  140. package/templates/triton/Dockerfile +128 -0
  141. package/templates/triton/config.pbtxt +163 -0
  142. package/templates/triton/model.py +130 -0
  143. package/templates/triton/requirements.txt +11 -0
@@ -0,0 +1,1147 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ set -e
6
+ set -u
7
+ set -o pipefail
8
+
9
+ # Source configuration
10
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
+ source "${SCRIPT_DIR}/config"
12
+
13
+ <% if (deploymentTarget === 'managed-inference') { %>
14
+ # ============================================================
15
+ # SageMaker Managed Inference Testing
16
+ # ============================================================
17
+
18
+ # Parse arguments
19
+ ENDPOINT_NAME="${1:-${ENDPOINT_NAME:-}}"
20
+
21
+ if [ -z "${ENDPOINT_NAME}" ]; then
22
+ echo "🧪 Testing local container at localhost:8080"
23
+ echo " Project: ${PROJECT_NAME}"
24
+ echo " Framework: ${FRAMEWORK}"
25
+ echo " Model server: ${MODEL_SERVER}"
26
+ TARGET_URL="http://localhost:8080"
27
+ TEST_MODE="local"
28
+ else
29
+ echo "🧪 Testing SageMaker endpoint: ${ENDPOINT_NAME}"
30
+ echo " Project: ${PROJECT_NAME}"
31
+ echo " Framework: ${FRAMEWORK}"
32
+ echo " Model server: ${MODEL_SERVER}"
33
+ echo " Region: ${AWS_REGION}"
34
+ TEST_MODE="sagemaker"
35
+ fi
36
+
37
+ echo ""
38
+
39
+ # Test 1: Health check (/ping)
40
+ echo "🔍 Test 1: Health check"
41
+ if [ "${TEST_MODE}" = "local" ]; then
42
+ echo " Sending GET request to ${TARGET_URL}/ping"
43
+
44
+ if ! PING_RESPONSE=$(curl -s -w "\n%{http_code}" -X GET "${TARGET_URL}/ping" 2>&1); then
45
+ echo "❌ Health check failed: Could not connect to local container"
46
+ echo " Make sure the container is running: ./do/run"
47
+ exit 1
48
+ fi
49
+
50
+ HTTP_CODE=$(echo "${PING_RESPONSE}" | tail -n1)
51
+ RESPONSE_BODY=$(echo "${PING_RESPONSE}" | sed '$d')
52
+
53
+ if [ "${HTTP_CODE}" = "200" ]; then
54
+ echo "✅ Health check passed (HTTP ${HTTP_CODE})"
55
+ else
56
+ echo "❌ Health check failed (HTTP ${HTTP_CODE})"
57
+ echo " Response: ${RESPONSE_BODY}"
58
+ exit 1
59
+ fi
60
+ else
61
+ # For SageMaker endpoints, /ping is not directly accessible
62
+ # We'll verify the endpoint exists and is InService
63
+ echo " Checking endpoint status..."
64
+
65
+ if ! ENDPOINT_STATUS=$(aws sagemaker describe-endpoint \
66
+ --endpoint-name "${ENDPOINT_NAME}" \
67
+ --region "${AWS_REGION}" \
68
+ --query 'EndpointStatus' \
69
+ --output text 2>&1); then
70
+ echo "❌ Endpoint not found or not accessible"
71
+ echo " Error: ${ENDPOINT_STATUS}"
72
+ exit 1
73
+ fi
74
+
75
+ if [ "${ENDPOINT_STATUS}" = "InService" ]; then
76
+ echo "✅ Endpoint is InService"
77
+ else
78
+ echo "❌ Endpoint is not InService (Status: ${ENDPOINT_STATUS})"
79
+ exit 1
80
+ fi
81
+ fi
82
+
83
+ echo ""
84
+
85
+ # Test 2: Inference request (/invocations)
86
+ echo "🔍 Test 2: Inference request"
87
+
88
+ # Create framework-specific test payload
89
+ case "${FRAMEWORK}" in
90
+ sklearn|xgboost)
91
+ # Traditional ML: JSON with instances array
92
+ TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
93
+ echo " Payload: Sample feature vector"
94
+ ;;
95
+ tensorflow)
96
+ # TensorFlow: JSON with instances array
97
+ TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
98
+ echo " Payload: Sample feature vector"
99
+ ;;
100
+ transformers)
101
+ # Transformers: payload format depends on model server
102
+ case "${MODEL_SERVER}" in
103
+ vllm|sglang)
104
+ # OpenAI-compatible chat completions format
105
+ # For S3/JumpStart models, vLLM registers the model under the local path
106
+ VLLM_MODEL_NAME="${MODEL_NAME}"
107
+ if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
108
+ VLLM_MODEL_NAME="/opt/ml/model"
109
+ fi
110
+ TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
111
+ echo " Payload: OpenAI-compatible chat completion request"
112
+ echo " Model: ${VLLM_MODEL_NAME}"
113
+ ;;
114
+ *)
115
+ # HuggingFace-style format for LMI, DJL, TensorRT-LLM
116
+ TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50, "temperature": 0.7}}'
117
+ echo " Payload: HuggingFace-style text generation request"
118
+ ;;
119
+ esac
120
+ ;;
121
+ diffusors)
122
+ # Diffusors: OpenAI DALL-E compatible image generation request
123
+ TEST_PAYLOAD='{"prompt": "A white cat", "n": 1, "size": "512x512"}'
124
+ echo " Payload: OpenAI DALL-E compatible image generation request"
125
+ ;;
126
+ *)
127
+ echo "❌ Unknown framework: ${FRAMEWORK}"
128
+ exit 3
129
+ ;;
130
+ esac
131
+
132
+ if [ "${TEST_MODE}" = "local" ]; then
133
+ if [ "${FRAMEWORK}" = "diffusors" ]; then
134
+ echo " Sending POST request to ${TARGET_URL}/v1/images/generations"
135
+
136
+ if ! INVOKE_RESPONSE=$(curl -s -m 120 -w "\n%{http_code}" -X POST "${TARGET_URL}/v1/images/generations" \
137
+ -H "Content-Type: application/json" \
138
+ -d "${TEST_PAYLOAD}" 2>&1); then
139
+ echo "❌ Inference request failed: Could not connect to local container"
140
+ exit 1
141
+ fi
142
+
143
+ HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
144
+ RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
145
+
146
+ if [ "${HTTP_CODE}" = "200" ]; then
147
+ # Validate response contains data array with b64_json
148
+ if echo "${RESPONSE_BODY}" | python3 -c "
149
+ import sys, json
150
+ resp = json.load(sys.stdin)
151
+ assert 'data' in resp, 'Missing data array'
152
+ assert len(resp['data']) > 0, 'Empty data array'
153
+ assert 'b64_json' in resp['data'][0], 'Missing b64_json in data entry'
154
+ " 2>/dev/null; then
155
+ echo "✅ Image generation successful (HTTP ${HTTP_CODE})"
156
+ echo " Response contains valid data array with b64_json image"
157
+
158
+ # Save generated image to file
159
+ OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
160
+ if echo "${RESPONSE_BODY}" | python3 -c "
161
+ import sys, json, base64
162
+ resp = json.load(sys.stdin)
163
+ img_data = base64.b64decode(resp['data'][0]['b64_json'])
164
+ with open('${OUTPUT_IMAGE}', 'wb') as f:
165
+ f.write(img_data)
166
+ " 2>/dev/null; then
167
+ echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
168
+ fi
169
+ else
170
+ echo "⚠️ Image generation returned HTTP ${HTTP_CODE} but response format unexpected"
171
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
172
+ fi
173
+ else
174
+ echo "❌ Image generation failed (HTTP ${HTTP_CODE})"
175
+ echo " Response: ${RESPONSE_BODY}"
176
+ exit 1
177
+ fi
178
+ else
179
+ echo " Sending POST request to ${TARGET_URL}/invocations"
180
+
181
+ if ! INVOKE_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "${TARGET_URL}/invocations" \
182
+ -H "Content-Type: application/json" \
183
+ -d "${TEST_PAYLOAD}" 2>&1); then
184
+ echo "❌ Inference request failed: Could not connect to local container"
185
+ exit 1
186
+ fi
187
+
188
+ HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
189
+ RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
190
+
191
+ if [ "${HTTP_CODE}" = "200" ]; then
192
+ echo "✅ Inference request successful (HTTP ${HTTP_CODE})"
193
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
194
+ if [ ${#RESPONSE_BODY} -gt 200 ]; then
195
+ echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
196
+ fi
197
+ else
198
+ echo "❌ Inference request failed (HTTP ${HTTP_CODE})"
199
+ echo " Response: ${RESPONSE_BODY}"
200
+ exit 1
201
+ fi
202
+ fi
203
+ else
204
+ echo " Invoking SageMaker endpoint..."
205
+
206
+ # Create temporary file for payload
207
+ TEMP_PAYLOAD=$(mktemp)
208
+ echo "${TEST_PAYLOAD}" > "${TEMP_PAYLOAD}"
209
+
210
+ # Create temporary file for response
211
+ TEMP_RESPONSE=$(mktemp)
212
+
213
+ # Invoke endpoint via inference component
214
+ IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
215
+ INVOKE_ARGS=(
216
+ --endpoint-name "${ENDPOINT_NAME}"
217
+ --region "${AWS_REGION}"
218
+ --content-type "application/json"
219
+ --body "fileb://${TEMP_PAYLOAD}"
220
+ )
221
+ if [ -n "${IC_NAME}" ]; then
222
+ INVOKE_ARGS+=(--inference-component-name "${IC_NAME}")
223
+ echo " Inference component: ${IC_NAME}"
224
+ fi
225
+
226
+ INVOKE_ERROR=$(mktemp)
227
+ if ! aws sagemaker-runtime invoke-endpoint \
228
+ "${INVOKE_ARGS[@]}" \
229
+ "${TEMP_RESPONSE}" 2>"${INVOKE_ERROR}"; then
230
+ echo "❌ Inference request failed"
231
+ echo " Error: $(cat "${INVOKE_ERROR}")"
232
+ rm -f "${TEMP_PAYLOAD}" "${TEMP_RESPONSE}" "${INVOKE_ERROR}"
233
+ exit 1
234
+ fi
235
+ rm -f "${INVOKE_ERROR}"
236
+
237
+ # Read response
238
+ RESPONSE_BODY=$(cat "${TEMP_RESPONSE}")
239
+
240
+ # Clean up temp files
241
+ rm -f "${TEMP_PAYLOAD}" "${TEMP_RESPONSE}"
242
+
243
+ if [ "${FRAMEWORK}" = "diffusors" ]; then
244
+ # Validate response contains data array with b64_json
245
+ if echo "${RESPONSE_BODY}" | python3 -c "
246
+ import sys, json
247
+ resp = json.load(sys.stdin)
248
+ assert 'data' in resp, 'Missing data array'
249
+ assert len(resp['data']) > 0, 'Empty data array'
250
+ assert 'b64_json' in resp['data'][0], 'Missing b64_json in data entry'
251
+ " 2>/dev/null; then
252
+ echo "✅ Image generation successful"
253
+ echo " Response contains valid data array with b64_json image"
254
+
255
+ # Save generated image to file
256
+ OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
257
+ if echo "${RESPONSE_BODY}" | python3 -c "
258
+ import sys, json, base64
259
+ resp = json.load(sys.stdin)
260
+ img_data = base64.b64decode(resp['data'][0]['b64_json'])
261
+ with open('${OUTPUT_IMAGE}', 'wb') as f:
262
+ f.write(img_data)
263
+ " 2>/dev/null; then
264
+ echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
265
+ fi
266
+ else
267
+ echo "⚠️ Image generation returned but response format unexpected"
268
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
269
+ fi
270
+ else
271
+ echo "✅ Inference request successful"
272
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
273
+ if [ ${#RESPONSE_BODY} -gt 200 ]; then
274
+ echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
275
+ fi
276
+ fi
277
+ fi
278
+
279
+ echo ""
280
+ echo "✅ All tests passed!"
281
+ echo ""
282
+
283
+ if [ "${TEST_MODE}" = "local" ]; then
284
+ echo "Next steps:"
285
+ echo " • Push to ECR: ./do/push"
286
+ echo " • Deploy to SageMaker: ./do/deploy"
287
+ else
288
+ echo "Endpoint is ready for production use!"
289
+ echo " • Endpoint name: ${ENDPOINT_NAME}"
290
+ echo " • Region: ${AWS_REGION}"
291
+ echo ""
292
+ echo "📝 Register this deployment:"
293
+ echo " ./do/register"
294
+ fi
295
+
296
+ <% } else if (deploymentTarget === 'async-inference') { %>
297
+ # ============================================================
298
+ # SageMaker Managed Inference - Async Testing
299
+ # ============================================================
300
+
301
+ # Parse arguments
302
+ ENDPOINT_NAME="${1:-${ENDPOINT_NAME:-}}"
303
+
304
+ if [ -z "${ENDPOINT_NAME}" ]; then
305
+ echo "🧪 Testing local container at localhost:8080"
306
+ echo " Project: ${PROJECT_NAME}"
307
+ echo " Framework: ${FRAMEWORK}"
308
+ echo " Model server: ${MODEL_SERVER}"
309
+ TARGET_URL="http://localhost:8080"
310
+ TEST_MODE="local"
311
+ else
312
+ echo "🧪 Testing SageMaker async endpoint: ${ENDPOINT_NAME}"
313
+ echo " Project: ${PROJECT_NAME}"
314
+ echo " Framework: ${FRAMEWORK}"
315
+ echo " Model server: ${MODEL_SERVER}"
316
+ echo " Region: ${AWS_REGION}"
317
+ echo " S3 output: ${ASYNC_S3_OUTPUT_PATH}"
318
+ TEST_MODE="sagemaker"
319
+ fi
320
+
321
+ echo ""
322
+
323
+ # Test 1: Health check (/ping)
324
+ echo "🔍 Test 1: Health check"
325
+ if [ "${TEST_MODE}" = "local" ]; then
326
+ echo " Sending GET request to ${TARGET_URL}/ping"
327
+
328
+ if ! PING_RESPONSE=$(curl -s -w "\n%{http_code}" -X GET "${TARGET_URL}/ping" 2>&1); then
329
+ echo "❌ Health check failed: Could not connect to local container"
330
+ echo " Make sure the container is running: ./do/run"
331
+ exit 1
332
+ fi
333
+
334
+ HTTP_CODE=$(echo "${PING_RESPONSE}" | tail -n1)
335
+ RESPONSE_BODY=$(echo "${PING_RESPONSE}" | sed '$d')
336
+
337
+ if [ "${HTTP_CODE}" = "200" ]; then
338
+ echo "✅ Health check passed (HTTP ${HTTP_CODE})"
339
+ else
340
+ echo "❌ Health check failed (HTTP ${HTTP_CODE})"
341
+ echo " Response: ${RESPONSE_BODY}"
342
+ exit 1
343
+ fi
344
+ else
345
+ # For SageMaker endpoints, verify the endpoint exists and is InService
346
+ echo " Checking endpoint status..."
347
+
348
+ if ! ENDPOINT_STATUS=$(aws sagemaker describe-endpoint \
349
+ --endpoint-name "${ENDPOINT_NAME}" \
350
+ --region "${AWS_REGION}" \
351
+ --query 'EndpointStatus' \
352
+ --output text 2>&1); then
353
+ echo "❌ Endpoint not found or not accessible"
354
+ echo " Error: ${ENDPOINT_STATUS}"
355
+ exit 1
356
+ fi
357
+
358
+ if [ "${ENDPOINT_STATUS}" = "InService" ]; then
359
+ echo "✅ Endpoint is InService"
360
+ else
361
+ echo "❌ Endpoint is not InService (Status: ${ENDPOINT_STATUS})"
362
+ exit 1
363
+ fi
364
+ fi
365
+
366
+ echo ""
367
+
368
+ # Test 2: Inference request
369
+ echo "🔍 Test 2: Inference request"
370
+
371
+ # Create framework-specific test payload
372
+ case "${FRAMEWORK}" in
373
+ sklearn|xgboost)
374
+ TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
375
+ echo " Payload: Sample feature vector"
376
+ ;;
377
+ tensorflow)
378
+ TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
379
+ echo " Payload: Sample feature vector"
380
+ ;;
381
+ transformers)
382
+ case "${MODEL_SERVER}" in
383
+ vllm|sglang)
384
+ VLLM_MODEL_NAME="${MODEL_NAME}"
385
+ if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
386
+ VLLM_MODEL_NAME="/opt/ml/model"
387
+ fi
388
+ TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
389
+ echo " Payload: OpenAI-compatible chat completion request"
390
+ echo " Model: ${VLLM_MODEL_NAME}"
391
+ ;;
392
+ *)
393
+ TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50, "temperature": 0.7}}'
394
+ echo " Payload: HuggingFace-style text generation request"
395
+ ;;
396
+ esac
397
+ ;;
398
+ diffusors)
399
+ TEST_PAYLOAD='{"prompt": "A white cat", "n": 1, "size": "512x512"}'
400
+ echo " Payload: OpenAI DALL-E compatible image generation request"
401
+ ;;
402
+ *)
403
+ echo "❌ Unknown framework: ${FRAMEWORK}"
404
+ exit 3
405
+ ;;
406
+ esac
407
+
408
+ if [ "${TEST_MODE}" = "local" ]; then
409
+ if [ "${FRAMEWORK}" = "diffusors" ]; then
410
+ echo " Sending POST request to ${TARGET_URL}/v1/images/generations"
411
+
412
+ if ! INVOKE_RESPONSE=$(curl -s -m 120 -w "\n%{http_code}" -X POST "${TARGET_URL}/v1/images/generations" \
413
+ -H "Content-Type: application/json" \
414
+ -d "${TEST_PAYLOAD}" 2>&1); then
415
+ echo "❌ Inference request failed: Could not connect to local container"
416
+ exit 1
417
+ fi
418
+
419
+ HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
420
+ RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
421
+
422
+ if [ "${HTTP_CODE}" = "200" ]; then
423
+ if echo "${RESPONSE_BODY}" | python3 -c "
424
+ import sys, json
425
+ resp = json.load(sys.stdin)
426
+ assert 'data' in resp, 'Missing data array'
427
+ assert len(resp['data']) > 0, 'Empty data array'
428
+ assert 'b64_json' in resp['data'][0], 'Missing b64_json in data entry'
429
+ " 2>/dev/null; then
430
+ echo "✅ Image generation successful (HTTP ${HTTP_CODE})"
431
+ echo " Response contains valid data array with b64_json image"
432
+
433
+ # Save generated image to file
434
+ OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
435
+ if echo "${RESPONSE_BODY}" | python3 -c "
436
+ import sys, json, base64
437
+ resp = json.load(sys.stdin)
438
+ img_data = base64.b64decode(resp['data'][0]['b64_json'])
439
+ with open('${OUTPUT_IMAGE}', 'wb') as f:
440
+ f.write(img_data)
441
+ " 2>/dev/null; then
442
+ echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
443
+ fi
444
+ else
445
+ echo "⚠️ Image generation returned HTTP ${HTTP_CODE} but response format unexpected"
446
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
447
+ fi
448
+ else
449
+ echo "❌ Image generation failed (HTTP ${HTTP_CODE})"
450
+ echo " Response: ${RESPONSE_BODY}"
451
+ exit 1
452
+ fi
453
+ else
454
+ echo " Sending POST request to ${TARGET_URL}/invocations"
455
+
456
+ if ! INVOKE_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "${TARGET_URL}/invocations" \
457
+ -H "Content-Type: application/json" \
458
+ -d "${TEST_PAYLOAD}" 2>&1); then
459
+ echo "❌ Inference request failed: Could not connect to local container"
460
+ exit 1
461
+ fi
462
+
463
+ HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
464
+ RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
465
+
466
+ if [ "${HTTP_CODE}" = "200" ]; then
467
+ echo "✅ Inference request successful (HTTP ${HTTP_CODE})"
468
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
469
+ if [ ${#RESPONSE_BODY} -gt 200 ]; then
470
+ echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
471
+ fi
472
+ else
473
+ echo "❌ Inference request failed (HTTP ${HTTP_CODE})"
474
+ echo " Response: ${RESPONSE_BODY}"
475
+ exit 1
476
+ fi
477
+ fi
478
+ else
479
+ # SageMaker async invocation: upload payload to S3, invoke async, poll for result
480
+ echo " Uploading test payload to S3..."
481
+
482
+ # Create temporary file for payload
483
+ TEMP_PAYLOAD=$(mktemp)
484
+ echo "${TEST_PAYLOAD}" > "${TEMP_PAYLOAD}"
485
+
486
+ # Upload payload to S3 input location
487
+ ASYNC_INPUT_KEY="${PROJECT_NAME}/input/test-payload-$(date +%s).json"
488
+ ASYNC_S3_BUCKET=$(echo "${ASYNC_S3_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
489
+ S3_INPUT_LOCATION="s3://${ASYNC_S3_BUCKET}/${ASYNC_INPUT_KEY}"
490
+
491
+ if ! aws s3 cp "${TEMP_PAYLOAD}" "${S3_INPUT_LOCATION}" --region "${AWS_REGION}" &> /dev/null; then
492
+ echo "❌ Failed to upload test payload to S3"
493
+ echo " Location: ${S3_INPUT_LOCATION}"
494
+ echo " Check that your IAM credentials have s3:PutObject permission"
495
+ rm -f "${TEMP_PAYLOAD}"
496
+ exit 1
497
+ fi
498
+ rm -f "${TEMP_PAYLOAD}"
499
+ echo "✅ Test payload uploaded to: ${S3_INPUT_LOCATION}"
500
+
501
+ # Invoke endpoint asynchronously (no inference components for async)
502
+ echo " Invoking async endpoint..."
503
+
504
+ INVOKE_ARGS=(
505
+ --endpoint-name "${ENDPOINT_NAME}"
506
+ --input-location "${S3_INPUT_LOCATION}"
507
+ --region "${AWS_REGION}"
508
+ --content-type "application/json"
509
+ )
510
+
511
+ if ! INVOKE_RESULT=$(aws sagemaker-runtime invoke-endpoint-async \
512
+ "${INVOKE_ARGS[@]}" 2>&1); then
513
+ echo "❌ Async invocation failed"
514
+ echo " Error: ${INVOKE_RESULT}"
515
+ exit 1
516
+ fi
517
+
518
+ # Extract output location from response
519
+ OUTPUT_LOCATION=$(echo "${INVOKE_RESULT}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('OutputLocation',''))" 2>/dev/null || echo "")
520
+
521
+ if [ -z "${OUTPUT_LOCATION}" ]; then
522
+ echo "⚠️ Async invocation accepted but no output location returned"
523
+ echo " Check the S3 output path for results: ${ASYNC_S3_OUTPUT_PATH}"
524
+ echo ""
525
+ echo "✅ Async invocation submitted successfully"
526
+ else
527
+ echo "✅ Async invocation accepted"
528
+ echo " Output location: ${OUTPUT_LOCATION}"
529
+
530
+ # Poll S3 output location for result
531
+ POLL_TIMEOUT=300
532
+ POLL_INTERVAL=10
533
+ ELAPSED=0
534
+
535
+ echo "⏳ Polling for async result (timeout: ${POLL_TIMEOUT}s)..."
536
+
537
+ while [ ${ELAPSED} -lt ${POLL_TIMEOUT} ]; do
538
+ if aws s3 ls "${OUTPUT_LOCATION}" --region "${AWS_REGION}" &> /dev/null; then
539
+ echo "✅ Async inference result available"
540
+
541
+ # Download and display result
542
+ TEMP_RESULT=$(mktemp)
543
+ if aws s3 cp "${OUTPUT_LOCATION}" "${TEMP_RESULT}" --region "${AWS_REGION}" &> /dev/null; then
544
+ RESPONSE_BODY=$(cat "${TEMP_RESULT}")
545
+ rm -f "${TEMP_RESULT}"
546
+
547
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
548
+ if [ ${#RESPONSE_BODY} -gt 200 ]; then
549
+ echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
550
+ fi
551
+
552
+ # For diffusors, extract and save the generated image
553
+ if [ "${FRAMEWORK}" = "diffusors" ]; then
554
+ OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
555
+ if echo "${RESPONSE_BODY}" | python3 -c "
556
+ import sys, json, base64
557
+ resp = json.load(sys.stdin)
558
+ if 'data' in resp and len(resp['data']) > 0 and 'b64_json' in resp['data'][0]:
559
+ img_data = base64.b64decode(resp['data'][0]['b64_json'])
560
+ with open('${OUTPUT_IMAGE}', 'wb') as f:
561
+ f.write(img_data)
562
+ print('ok')
563
+ else:
564
+ print('skip')
565
+ " 2>/dev/null | grep -q "ok"; then
566
+ echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
567
+ fi
568
+ fi
569
+ else
570
+ rm -f "${TEMP_RESULT}"
571
+ echo "⚠️ Result exists but could not be downloaded"
572
+ fi
573
+ break
574
+ fi
575
+
576
+ sleep ${POLL_INTERVAL}
577
+ ELAPSED=$((ELAPSED + POLL_INTERVAL))
578
+ echo " ⏳ Waiting... (${ELAPSED}s / ${POLL_TIMEOUT}s)"
579
+ done
580
+
581
+ if [ ${ELAPSED} -ge ${POLL_TIMEOUT} ]; then
582
+ echo "❌ Async inference timed out after ${POLL_TIMEOUT}s"
583
+ echo ""
584
+ echo " The request may still be processing. Check:"
585
+ echo " • S3 output path: ${OUTPUT_LOCATION}"
586
+ echo " • CloudWatch Logs: /aws/sagemaker/Endpoints/${ENDPOINT_NAME}"
587
+ echo " • Endpoint status: aws sagemaker describe-endpoint --endpoint-name ${ENDPOINT_NAME} --region ${AWS_REGION}"
588
+ exit 1
589
+ fi
590
+ fi
591
+ fi
592
+
593
+ echo ""
594
+ echo "✅ All tests passed!"
595
+ echo ""
596
+
597
+ if [ "${TEST_MODE}" = "local" ]; then
598
+ echo "Next steps:"
599
+ echo " • Push to ECR: ./do/push"
600
+ echo " • Deploy to SageMaker: ./do/deploy"
601
+ else
602
+ echo "Async endpoint is ready for production use!"
603
+ echo " • Endpoint name: ${ENDPOINT_NAME}"
604
+ echo " • Region: ${AWS_REGION}"
605
+ echo " • S3 output: ${ASYNC_S3_OUTPUT_PATH}"
606
+ echo ""
607
+ echo "📝 Register this deployment:"
608
+ echo " ./do/register"
609
+ fi
610
+
611
+ <% } else if (deploymentTarget === 'hyperpod-eks') { %>
612
+ # ============================================================
613
+ # HyperPod EKS Testing
614
+ # ============================================================
615
+
616
+ # Parse arguments: local or hyperpod test mode
617
+ # Default to hyperpod if no argument given (deployment target is hyperpod-eks)
618
+ TEST_TARGET="${1:-hyperpod}"
619
+
620
+ case "${TEST_TARGET}" in
621
+ local)
622
+ echo "🧪 Testing local container at localhost:8080"
623
+ echo " Project: ${PROJECT_NAME}"
624
+ echo " Framework: ${FRAMEWORK}"
625
+ echo " Model server: ${MODEL_SERVER}"
626
+ TARGET_URL="http://localhost:8080"
627
+ ;;
628
+ hyperpod)
629
+ echo "🧪 Testing HyperPod EKS deployment"
630
+ echo " Project: ${PROJECT_NAME}"
631
+ echo " Framework: ${FRAMEWORK}"
632
+ echo " Model server: ${MODEL_SERVER}"
633
+ echo " Cluster: ${HYPERPOD_CLUSTER_NAME}"
634
+ echo " Namespace: ${HYPERPOD_NAMESPACE}"
635
+ echo " Region: ${AWS_REGION}"
636
+ echo ""
637
+
638
+ # Get kubeconfig for HyperPod cluster
639
+ echo "🔑 Configuring kubectl for HyperPod cluster..."
640
+ KUBECONFIG_PATH="${HOME}/.kube/hyperpod-${HYPERPOD_CLUSTER_NAME}"
641
+
642
+ EKS_CLUSTER_ARN=$(aws sagemaker describe-cluster \
643
+ --cluster-name "${HYPERPOD_CLUSTER_NAME}" \
644
+ --region "${AWS_REGION}" \
645
+ --query "Orchestrator.Eks.ClusterArn" \
646
+ --output text 2>&1) || {
647
+ echo "❌ Failed to describe HyperPod cluster: ${HYPERPOD_CLUSTER_NAME}"
648
+ echo ""
649
+ echo " Check that:"
650
+ echo " • The cluster name is correct"
651
+ echo " • The cluster exists in region: ${AWS_REGION}"
652
+ echo " • Your IAM user/role has permission to access the cluster"
653
+ exit 4
654
+ }
655
+
656
+ EKS_CLUSTER_NAME=$(echo "${EKS_CLUSTER_ARN}" | awk -F'/' '{print $NF}')
657
+
658
+ if ! aws eks update-kubeconfig \
659
+ --name "${EKS_CLUSTER_NAME}" \
660
+ --region "${AWS_REGION}" \
661
+ --kubeconfig "${KUBECONFIG_PATH}" 2>&1; then
662
+ echo "❌ Failed to configure kubectl for EKS cluster: ${EKS_CLUSTER_NAME}"
663
+ exit 4
664
+ fi
665
+
666
+ export KUBECONFIG="${KUBECONFIG_PATH}"
667
+
668
+ # Verify cluster connectivity
669
+ if ! kubectl cluster-info &> /dev/null; then
670
+ echo "❌ Cannot connect to HyperPod cluster"
671
+ exit 4
672
+ fi
673
+ echo "✅ Connected to HyperPod cluster"
674
+
675
+ # Port-forward the service to a local port
676
+ LOCAL_PORT=8080
677
+ echo ""
678
+ echo "🔌 Port-forwarding svc/${PROJECT_NAME} to localhost:${LOCAL_PORT}..."
679
+ kubectl port-forward "svc/${PROJECT_NAME}" "${LOCAL_PORT}:8080" \
680
+ -n "${HYPERPOD_NAMESPACE}" &
681
+ PF_PID=$!
682
+
683
+ # Wait for port-forward to establish
684
+ sleep 3
685
+
686
+ # Ensure cleanup on exit
687
+ trap "kill ${PF_PID} 2>/dev/null || true" EXIT
688
+
689
+ # Verify port-forward is running
690
+ if ! kill -0 ${PF_PID} 2>/dev/null; then
691
+ echo "❌ Port-forward failed to start"
692
+ echo ""
693
+ echo " Check that:"
694
+ echo " • The service exists: kubectl get svc ${PROJECT_NAME} -n ${HYPERPOD_NAMESPACE}"
695
+ echo " • The deployment is running: kubectl get pods -n ${HYPERPOD_NAMESPACE}"
696
+ exit 1
697
+ fi
698
+ echo "✅ Port-forward established"
699
+
700
+ TARGET_URL="http://localhost:${LOCAL_PORT}"
701
+ ;;
702
+ *)
703
+ echo "Usage: ./do/test [local|hyperpod]"
704
+ echo ""
705
+ echo "Test modes:"
706
+ echo " local - Test local container at localhost:8080"
707
+ echo " hyperpod - Test HyperPod EKS deployment via port-forward"
708
+ exit 1
709
+ ;;
710
+ esac
711
+
712
+ echo ""
713
+
714
+ # Test 1: Health check (/ping)
715
+ echo "🔍 Test 1: Health check"
716
+ echo " Sending GET request to ${TARGET_URL}/ping"
717
+
718
+ if ! PING_RESPONSE=$(curl -s -w "\n%{http_code}" -X GET "${TARGET_URL}/ping" 2>&1); then
719
+ echo "❌ Health check failed: Could not connect"
720
+ if [ "${TEST_TARGET}" = "local" ]; then
721
+ echo " Make sure the container is running: ./do/run"
722
+ else
723
+ echo " Check that the port-forward is working and pods are running"
724
+ fi
725
+ exit 1
726
+ fi
727
+
728
+ HTTP_CODE=$(echo "${PING_RESPONSE}" | tail -n1)
729
+ RESPONSE_BODY=$(echo "${PING_RESPONSE}" | sed '$d')
730
+
731
+ if [ "${HTTP_CODE}" = "200" ]; then
732
+ echo "✅ Health check passed (HTTP ${HTTP_CODE})"
733
+ else
734
+ echo "❌ Health check failed (HTTP ${HTTP_CODE})"
735
+ echo " Response: ${RESPONSE_BODY}"
736
+ exit 1
737
+ fi
738
+
739
+ echo ""
740
+
741
+ # Test 2: Inference request (/invocations)
742
+ echo "🔍 Test 2: Inference request"
743
+
744
+ # Create framework-specific test payload
745
+ case "${FRAMEWORK}" in
746
+ sklearn|xgboost)
747
+ # Traditional ML: JSON with instances array
748
+ TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
749
+ echo " Payload: Sample feature vector"
750
+ ;;
751
+ tensorflow)
752
+ # TensorFlow: JSON with instances array
753
+ TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
754
+ echo " Payload: Sample feature vector"
755
+ ;;
756
+ transformers)
757
+ # Transformers: payload format depends on model server
758
+ case "${MODEL_SERVER}" in
759
+ vllm|sglang)
760
+ # OpenAI-compatible chat completions format
761
+ VLLM_MODEL_NAME="${MODEL_NAME}"
762
+ if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
763
+ VLLM_MODEL_NAME="/opt/ml/model"
764
+ fi
765
+ TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
766
+ echo " Payload: OpenAI-compatible chat completion request"
767
+ echo " Model: ${VLLM_MODEL_NAME}"
768
+ ;;
769
+ *)
770
+ # HuggingFace-style format for LMI, DJL, TensorRT-LLM
771
+ TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50, "temperature": 0.7}}'
772
+ echo " Payload: HuggingFace-style text generation request"
773
+ ;;
774
+ esac
775
+ ;;
776
+ diffusors)
777
+ # Diffusors: OpenAI DALL-E compatible image generation request
778
+ TEST_PAYLOAD='{"prompt": "A white cat", "n": 1, "size": "512x512"}'
779
+ echo " Payload: OpenAI DALL-E compatible image generation request"
780
+ ;;
781
+ *)
782
+ echo "❌ Unknown framework: ${FRAMEWORK}"
783
+ exit 3
784
+ ;;
785
+ esac
786
+
787
+ if [ "${FRAMEWORK}" = "diffusors" ]; then
788
+ echo " Sending POST request to ${TARGET_URL}/v1/images/generations"
789
+
790
+ if ! INVOKE_RESPONSE=$(curl -s -m 120 -w "\n%{http_code}" -X POST "${TARGET_URL}/v1/images/generations" \
791
+ -H "Content-Type: application/json" \
792
+ -d "${TEST_PAYLOAD}" 2>&1); then
793
+ echo "❌ Inference request failed: Could not connect"
794
+ exit 1
795
+ fi
796
+
797
+ HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
798
+ RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
799
+
800
+ if [ "${HTTP_CODE}" = "200" ]; then
801
+ # Validate response contains data array with b64_json
802
+ if echo "${RESPONSE_BODY}" | python3 -c "
803
+ import sys, json
804
+ resp = json.load(sys.stdin)
805
+ assert 'data' in resp, 'Missing data array'
806
+ assert len(resp['data']) > 0, 'Empty data array'
807
+ assert 'b64_json' in resp['data'][0], 'Missing b64_json in data entry'
808
+ " 2>/dev/null; then
809
+ echo "✅ Image generation successful (HTTP ${HTTP_CODE})"
810
+ echo " Response contains valid data array with b64_json image"
811
+
812
+ # Save generated image to file
813
+ OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
814
+ if echo "${RESPONSE_BODY}" | python3 -c "
815
+ import sys, json, base64
816
+ resp = json.load(sys.stdin)
817
+ img_data = base64.b64decode(resp['data'][0]['b64_json'])
818
+ with open('${OUTPUT_IMAGE}', 'wb') as f:
819
+ f.write(img_data)
820
+ " 2>/dev/null; then
821
+ echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
822
+ fi
823
+ else
824
+ echo "⚠️ Image generation returned HTTP ${HTTP_CODE} but response format unexpected"
825
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
826
+ fi
827
+ else
828
+ echo "❌ Image generation failed (HTTP ${HTTP_CODE})"
829
+ echo " Response: ${RESPONSE_BODY}"
830
+ exit 1
831
+ fi
832
+ else
833
+ echo " Sending POST request to ${TARGET_URL}/invocations"
834
+
835
+ if ! INVOKE_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "${TARGET_URL}/invocations" \
836
+ -H "Content-Type: application/json" \
837
+ -d "${TEST_PAYLOAD}" 2>&1); then
838
+ echo "❌ Inference request failed: Could not connect"
839
+ exit 1
840
+ fi
841
+
842
+ HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
843
+ RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
844
+
845
+ if [ "${HTTP_CODE}" = "200" ]; then
846
+ echo "✅ Inference request successful (HTTP ${HTTP_CODE})"
847
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
848
+ if [ ${#RESPONSE_BODY} -gt 200 ]; then
849
+ echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
850
+ fi
851
+ else
852
+ echo "❌ Inference request failed (HTTP ${HTTP_CODE})"
853
+ echo " Response: ${RESPONSE_BODY}"
854
+ exit 1
855
+ fi
856
+ fi
857
+
858
+ echo ""
859
+ echo "✅ All tests passed!"
860
+ echo ""
861
+
862
+ if [ "${TEST_TARGET}" = "local" ]; then
863
+ echo "Next steps:"
864
+ echo " • Push to ECR: ./do/push"
865
+ echo " • Deploy to HyperPod: ./do/deploy"
866
+ else
867
+ echo "HyperPod deployment is ready for production use!"
868
+ echo " • Cluster: ${HYPERPOD_CLUSTER_NAME}"
869
+ echo " • Namespace: ${HYPERPOD_NAMESPACE}"
870
+ echo " • Service: ${PROJECT_NAME}"
871
+ echo ""
872
+ echo "📝 Register this deployment:"
873
+ echo " ./do/register"
874
+ fi
875
+
876
+ <% } else if (deploymentTarget === 'batch-transform') { %>
877
+ # ============================================================
878
+ # SageMaker Managed Inference - Batch Testing
879
+ # ============================================================
880
+
881
+ # Parse arguments: local or batch test mode
882
+ # Default to batch if no argument given (deployment target is batch-transform)
883
+ TEST_TARGET="${1:-batch}"
884
+
885
+ case "${TEST_TARGET}" in
886
+ local)
887
+ echo "🧪 Testing local container at localhost:8080"
888
+ echo " Project: ${PROJECT_NAME}"
889
+ echo " Framework: ${FRAMEWORK}"
890
+ echo " Model server: ${MODEL_SERVER}"
891
+ TARGET_URL="http://localhost:8080"
892
+ ;;
893
+ batch)
894
+ echo "🧪 Checking batch transform job status"
895
+ echo " Project: ${PROJECT_NAME}"
896
+ echo " Framework: ${FRAMEWORK}"
897
+ echo " Region: ${AWS_REGION}"
898
+ echo " S3 input: ${BATCH_INPUT_PATH}"
899
+ echo " S3 output: ${BATCH_OUTPUT_PATH}"
900
+ echo ""
901
+
902
+ # Get transform job name from config
903
+ TRANSFORM_JOB_NAME="${TRANSFORM_JOB_NAME:-}"
904
+ if [ -z "${TRANSFORM_JOB_NAME}" ]; then
905
+ echo "❌ No transform job name found"
906
+ echo " Run ./do/deploy first to create a transform job"
907
+ exit 1
908
+ fi
909
+
910
+ echo "🔍 Checking transform job: ${TRANSFORM_JOB_NAME}"
911
+
912
+ if ! JOB_STATUS_JSON=$(aws sagemaker describe-transform-job \
913
+ --transform-job-name "${TRANSFORM_JOB_NAME}" \
914
+ --region "${AWS_REGION}" 2>&1); then
915
+ echo "❌ Failed to describe transform job"
916
+ echo " Error: ${JOB_STATUS_JSON}"
917
+ exit 1
918
+ fi
919
+
920
+ JOB_STATUS=$(echo "${JOB_STATUS_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('TransformJobStatus','Unknown'))" 2>/dev/null || echo "Unknown")
921
+
922
+ case "${JOB_STATUS}" in
923
+ Completed)
924
+ echo "✅ Transform job completed successfully"
925
+ echo ""
926
+
927
+ # Download results locally
928
+ LOCAL_OUTPUT_DIR="${SCRIPT_DIR}/../batch-output"
929
+ mkdir -p "${LOCAL_OUTPUT_DIR}"
930
+ echo "📥 Downloading results to ${LOCAL_OUTPUT_DIR}/"
931
+ if aws s3 sync "${BATCH_OUTPUT_PATH}" "${LOCAL_OUTPUT_DIR}/" --region "${AWS_REGION}"; then
932
+ DOWNLOADED=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | wc -l | tr -d ' ')
933
+ echo "✅ Downloaded ${DOWNLOADED} file(s) to ${LOCAL_OUTPUT_DIR}/"
934
+ echo ""
935
+
936
+ # Display first output file preview
937
+ FIRST_FILE=$(ls -1 "${LOCAL_OUTPUT_DIR}" 2>/dev/null | head -1)
938
+ if [ -n "${FIRST_FILE}" ]; then
939
+ echo "📄 Sample output (${FIRST_FILE}):"
940
+ head -5 "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}"
941
+ LINES=$(wc -l < "${LOCAL_OUTPUT_DIR}/${FIRST_FILE}" | tr -d ' ')
942
+ if [ "${LINES}" -gt 5 ]; then
943
+ echo " ... (${LINES} total lines)"
944
+ fi
945
+ fi
946
+ else
947
+ echo "⚠️ Could not download output files"
948
+ fi
949
+
950
+ echo ""
951
+ echo "✅ All tests passed!"
952
+ echo ""
953
+ echo "📝 Register this deployment:"
954
+ echo " ./do/register"
955
+ ;;
956
+ InProgress)
957
+ echo "⏳ Transform job is still in progress"
958
+
959
+ # Extract progress details
960
+ CREATION_TIME=$(echo "${JOB_STATUS_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('CreationTime','Unknown'))" 2>/dev/null || echo "Unknown")
961
+ echo " Started: ${CREATION_TIME}"
962
+ echo " Status: InProgress"
963
+ echo ""
964
+ echo " The job is still running. Check again later:"
965
+ echo " ./do/test"
966
+ echo ""
967
+ echo " View logs:"
968
+ echo " ./do/logs"
969
+ ;;
970
+ Failed)
971
+ echo "❌ Transform job failed"
972
+
973
+ FAILURE_REASON=$(echo "${JOB_STATUS_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('FailureReason','Unknown'))" 2>/dev/null || echo "Unknown")
974
+ echo " Reason: ${FAILURE_REASON}"
975
+ echo ""
976
+ echo " View logs for more details:"
977
+ echo " ./do/logs"
978
+ exit 1
979
+ ;;
980
+ Stopped)
981
+ echo "⚠️ Transform job was stopped"
982
+ echo " The job was manually stopped before completion"
983
+ echo ""
984
+ echo " To start a new job, run:"
985
+ echo " ./do/deploy"
986
+ ;;
987
+ *)
988
+ echo "⚠️ Transform job status: ${JOB_STATUS}"
989
+ echo " Check again later: ./do/test"
990
+ ;;
991
+ esac
992
+ exit 0
993
+ ;;
994
+ *)
995
+ echo "Usage: ./do/test [local|batch]"
996
+ echo ""
997
+ echo "Test modes:"
998
+ echo " local - Test local container at localhost:8080"
999
+ echo " batch - Check transform job status and view results"
1000
+ exit 1
1001
+ ;;
1002
+ esac
1003
+
1004
+ echo ""
1005
+
1006
+ # Test 1: Health check (/ping)
1007
+ echo "🔍 Test 1: Health check"
1008
+ echo " Sending GET request to ${TARGET_URL}/ping"
1009
+
1010
+ if ! PING_RESPONSE=$(curl -s -w "\n%{http_code}" -X GET "${TARGET_URL}/ping" 2>&1); then
1011
+ echo "❌ Health check failed: Could not connect to local container"
1012
+ echo " Make sure the container is running: ./do/run"
1013
+ exit 1
1014
+ fi
1015
+
1016
+ HTTP_CODE=$(echo "${PING_RESPONSE}" | tail -n1)
1017
+ RESPONSE_BODY=$(echo "${PING_RESPONSE}" | sed '$d')
1018
+
1019
+ if [ "${HTTP_CODE}" = "200" ]; then
1020
+ echo "✅ Health check passed (HTTP ${HTTP_CODE})"
1021
+ else
1022
+ echo "❌ Health check failed (HTTP ${HTTP_CODE})"
1023
+ echo " Response: ${RESPONSE_BODY}"
1024
+ exit 1
1025
+ fi
1026
+
1027
+ echo ""
1028
+
1029
+ # Test 2: Inference request (/invocations)
1030
+ echo "🔍 Test 2: Inference request"
1031
+
1032
+ # Create framework-specific test payload
1033
+ case "${FRAMEWORK}" in
1034
+ sklearn|xgboost)
1035
+ TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
1036
+ echo " Payload: Sample feature vector"
1037
+ ;;
1038
+ tensorflow)
1039
+ TEST_PAYLOAD='{"instances": [[1.0, 2.0, 3.0, 4.0]]}'
1040
+ echo " Payload: Sample feature vector"
1041
+ ;;
1042
+ transformers)
1043
+ case "${MODEL_SERVER}" in
1044
+ vllm|sglang)
1045
+ VLLM_MODEL_NAME="${MODEL_NAME}"
1046
+ if [[ "${MODEL_NAME}" == jumpstart://* ]] || [[ "${MODEL_NAME}" == jumpstart-hub://* ]] || [[ "${MODEL_NAME}" == s3://* ]] || [[ "${MODEL_NAME}" == /opt/ml/* ]]; then
1047
+ VLLM_MODEL_NAME="/opt/ml/model"
1048
+ fi
1049
+ TEST_PAYLOAD='{"model": "'"${VLLM_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
1050
+ echo " Payload: OpenAI-compatible chat completion request"
1051
+ echo " Model: ${VLLM_MODEL_NAME}"
1052
+ ;;
1053
+ *)
1054
+ TEST_PAYLOAD='{"inputs": "What is machine learning?", "parameters": {"max_new_tokens": 50, "temperature": 0.7}}'
1055
+ echo " Payload: HuggingFace-style text generation request"
1056
+ ;;
1057
+ esac
1058
+ ;;
1059
+ diffusors)
1060
+ TEST_PAYLOAD='{"prompt": "A white cat", "n": 1, "size": "512x512"}'
1061
+ echo " Payload: OpenAI DALL-E compatible image generation request"
1062
+ ;;
1063
+ *)
1064
+ echo "❌ Unknown framework: ${FRAMEWORK}"
1065
+ exit 3
1066
+ ;;
1067
+ esac
1068
+
1069
+ if [ "${FRAMEWORK}" = "diffusors" ]; then
1070
+ echo " Sending POST request to ${TARGET_URL}/v1/images/generations"
1071
+
1072
+ if ! INVOKE_RESPONSE=$(curl -s -m 120 -w "\n%{http_code}" -X POST "${TARGET_URL}/v1/images/generations" \
1073
+ -H "Content-Type: application/json" \
1074
+ -d "${TEST_PAYLOAD}" 2>&1); then
1075
+ echo "❌ Inference request failed: Could not connect to local container"
1076
+ exit 1
1077
+ fi
1078
+
1079
+ HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
1080
+ RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
1081
+
1082
+ if [ "${HTTP_CODE}" = "200" ]; then
1083
+ if echo "${RESPONSE_BODY}" | python3 -c "
1084
+ import sys, json
1085
+ resp = json.load(sys.stdin)
1086
+ assert 'data' in resp, 'Missing data array'
1087
+ assert len(resp['data']) > 0, 'Empty data array'
1088
+ assert 'b64_json' in resp['data'][0], 'Missing b64_json in data entry'
1089
+ " 2>/dev/null; then
1090
+ echo "✅ Image generation successful (HTTP ${HTTP_CODE})"
1091
+ echo " Response contains valid data array with b64_json image"
1092
+
1093
+ # Save generated image to file
1094
+ OUTPUT_IMAGE="${SCRIPT_DIR}/../test_output.png"
1095
+ if echo "${RESPONSE_BODY}" | python3 -c "
1096
+ import sys, json, base64
1097
+ resp = json.load(sys.stdin)
1098
+ img_data = base64.b64decode(resp['data'][0]['b64_json'])
1099
+ with open('${OUTPUT_IMAGE}', 'wb') as f:
1100
+ f.write(img_data)
1101
+ " 2>/dev/null; then
1102
+ echo " 🖼️ Image saved to: ${OUTPUT_IMAGE}"
1103
+ fi
1104
+ else
1105
+ echo "⚠️ Image generation returned HTTP ${HTTP_CODE} but response format unexpected"
1106
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
1107
+ fi
1108
+ else
1109
+ echo "❌ Image generation failed (HTTP ${HTTP_CODE})"
1110
+ echo " Response: ${RESPONSE_BODY}"
1111
+ exit 1
1112
+ fi
1113
+ else
1114
+ echo " Sending POST request to ${TARGET_URL}/invocations"
1115
+
1116
+ if ! INVOKE_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "${TARGET_URL}/invocations" \
1117
+ -H "Content-Type: application/json" \
1118
+ -d "${TEST_PAYLOAD}" 2>&1); then
1119
+ echo "❌ Inference request failed: Could not connect to local container"
1120
+ exit 1
1121
+ fi
1122
+
1123
+ HTTP_CODE=$(echo "${INVOKE_RESPONSE}" | tail -n1)
1124
+ RESPONSE_BODY=$(echo "${INVOKE_RESPONSE}" | sed '$d')
1125
+
1126
+ if [ "${HTTP_CODE}" = "200" ]; then
1127
+ echo "✅ Inference request successful (HTTP ${HTTP_CODE})"
1128
+ echo " Response preview: ${RESPONSE_BODY:0:200}"
1129
+ if [ ${#RESPONSE_BODY} -gt 200 ]; then
1130
+ echo " (truncated, full response is ${#RESPONSE_BODY} characters)"
1131
+ fi
1132
+ else
1133
+ echo "❌ Inference request failed (HTTP ${HTTP_CODE})"
1134
+ echo " Response: ${RESPONSE_BODY}"
1135
+ exit 1
1136
+ fi
1137
+ fi
1138
+
1139
+ echo ""
1140
+ echo "✅ All tests passed!"
1141
+ echo ""
1142
+
1143
+ echo "Next steps:"
1144
+ echo " • Push to ECR: ./do/push"
1145
+ echo " • Deploy batch transform: ./do/deploy"
1146
+
1147
+ <% } %>