@aws/ml-container-creator 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +5 -2
- package/infra/ci-harness/buildspec.yml +60 -0
- package/package.json +1 -1
- package/servers/README.md +41 -1
- package/servers/instance-sizer/index.js +6 -0
- package/src/app.js +33 -2
- package/src/lib/config-manager.js +40 -1
- package/src/lib/deployment-entry-schema.js +16 -0
- package/src/lib/prompt-runner.js +174 -3
- package/src/lib/prompts.js +222 -2
- package/src/lib/registry-command-handler.js +12 -0
- package/templates/Dockerfile +12 -0
- package/templates/code/serving.properties +14 -0
- package/templates/do/adapter +1214 -0
- package/templates/do/adapters/.gitkeep +2 -0
- package/templates/do/add-ic +130 -0
- package/templates/do/benchmark +81 -9
- package/templates/do/clean +507 -17
- package/templates/do/config +23 -1
- package/templates/do/deploy +513 -367
- package/templates/do/ic/default.conf +32 -0
- package/templates/do/lib/endpoint-config.sh +216 -0
- package/templates/do/lib/inference-component.sh +167 -0
- package/templates/do/lib/secrets.sh +44 -0
- package/templates/do/lib/wait.sh +131 -0
- package/templates/do/logs +107 -27
- package/templates/do/optimize +528 -0
- package/templates/do/register +111 -1
- package/templates/do/status +337 -0
- package/templates/do/test +80 -28
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
set -u
|
|
7
|
+
set -o pipefail
|
|
8
|
+
|
|
9
|
+
# Source configuration
|
|
10
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
11
|
+
source "${SCRIPT_DIR}/config"
|
|
12
|
+
|
|
13
|
+
# ============================================================
|
|
14
|
+
# SageMaker Real-Time Inference Status
|
|
15
|
+
# ============================================================
|
|
16
|
+
|
|
17
|
+
# Validate AWS credentials
|
|
18
|
+
if ! aws sts get-caller-identity &> /dev/null; then
|
|
19
|
+
echo "❌ AWS credentials not configured"
|
|
20
|
+
echo " Run: aws configure"
|
|
21
|
+
exit 4
|
|
22
|
+
fi
|
|
23
|
+
|
|
24
|
+
# Check that we have an endpoint to query
|
|
25
|
+
if [ -z "${ENDPOINT_NAME:-}" ]; then
|
|
26
|
+
echo "❌ No endpoint configured"
|
|
27
|
+
echo " Run ./do/deploy first to create an endpoint."
|
|
28
|
+
exit 1
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
# ============================================================
|
|
32
|
+
# Describe Endpoint
|
|
33
|
+
# ============================================================
|
|
34
|
+
ENDPOINT_JSON=$(aws sagemaker describe-endpoint \
|
|
35
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
36
|
+
--region "${AWS_REGION}" 2>/dev/null) || {
|
|
37
|
+
echo "❌ Endpoint not found: ${ENDPOINT_NAME}"
|
|
38
|
+
echo " The endpoint may have been deleted. Run ./do/deploy to create a new one."
|
|
39
|
+
exit 1
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
EP_STATUS=$(echo "${ENDPOINT_JSON}" | grep -o '"EndpointStatus":"[^"]*"' | head -1 | cut -d'"' -f4)
|
|
43
|
+
EP_INSTANCE_TYPE=$(echo "${ENDPOINT_JSON}" | grep -o '"InstanceType":"[^"]*"' | head -1 | cut -d'"' -f4)
|
|
44
|
+
EP_INSTANCE_COUNT=$(echo "${ENDPOINT_JSON}" | grep -o '"CurrentInstanceCount":[0-9]*' | head -1 | cut -d':' -f2)
|
|
45
|
+
|
|
46
|
+
# Fallback for instance count if not available
|
|
47
|
+
if [ -z "${EP_INSTANCE_COUNT}" ]; then
|
|
48
|
+
EP_INSTANCE_COUNT=$(echo "${ENDPOINT_JSON}" | grep -o '"InitialInstanceCount":[0-9]*' | head -1 | cut -d':' -f2)
|
|
49
|
+
fi
|
|
50
|
+
EP_INSTANCE_COUNT="${EP_INSTANCE_COUNT:-1}"
|
|
51
|
+
|
|
52
|
+
# Use INSTANCE_TYPE from config as fallback if not in describe response
|
|
53
|
+
EP_INSTANCE_TYPE="${EP_INSTANCE_TYPE:-${INSTANCE_TYPE:-unknown}}"
|
|
54
|
+
|
|
55
|
+
# GPU count lookup for the instance type
|
|
56
|
+
_get_instance_gpus() {
|
|
57
|
+
local itype="$1"
|
|
58
|
+
case "${itype}" in
|
|
59
|
+
ml.g4dn.xlarge) echo 1 ;;
|
|
60
|
+
ml.g4dn.12xlarge) echo 4 ;;
|
|
61
|
+
ml.g5.xlarge) echo 1 ;;
|
|
62
|
+
ml.g5.2xlarge) echo 1 ;;
|
|
63
|
+
ml.g5.4xlarge) echo 1 ;;
|
|
64
|
+
ml.g5.8xlarge) echo 1 ;;
|
|
65
|
+
ml.g5.12xlarge) echo 4 ;;
|
|
66
|
+
ml.g5.48xlarge) echo 8 ;;
|
|
67
|
+
ml.g6.xlarge) echo 1 ;;
|
|
68
|
+
ml.g6.12xlarge) echo 4 ;;
|
|
69
|
+
ml.g6.48xlarge) echo 8 ;;
|
|
70
|
+
ml.g6e.xlarge) echo 1 ;;
|
|
71
|
+
ml.g6e.2xlarge) echo 1 ;;
|
|
72
|
+
ml.g6e.4xlarge) echo 1 ;;
|
|
73
|
+
ml.g6e.8xlarge) echo 1 ;;
|
|
74
|
+
ml.g6e.12xlarge) echo 4 ;;
|
|
75
|
+
ml.g6e.48xlarge) echo 8 ;;
|
|
76
|
+
ml.g7e.xlarge) echo 1 ;;
|
|
77
|
+
ml.g7e.2xlarge) echo 1 ;;
|
|
78
|
+
ml.g7e.4xlarge) echo 1 ;;
|
|
79
|
+
ml.g7e.8xlarge) echo 1 ;;
|
|
80
|
+
ml.g7e.12xlarge) echo 4 ;;
|
|
81
|
+
ml.g7e.48xlarge) echo 8 ;;
|
|
82
|
+
ml.p3.2xlarge) echo 1 ;;
|
|
83
|
+
ml.p3.8xlarge) echo 4 ;;
|
|
84
|
+
ml.p3.16xlarge) echo 8 ;;
|
|
85
|
+
ml.p4d.24xlarge) echo 8 ;;
|
|
86
|
+
ml.p4de.24xlarge) echo 8 ;;
|
|
87
|
+
ml.p5.48xlarge) echo 8 ;;
|
|
88
|
+
*) echo "" ;;
|
|
89
|
+
esac
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
INSTANCE_GPUS=$(_get_instance_gpus "${EP_INSTANCE_TYPE}")
|
|
93
|
+
TOTAL_GPUS=""
|
|
94
|
+
if [ -n "${INSTANCE_GPUS}" ]; then
|
|
95
|
+
TOTAL_GPUS=$(( INSTANCE_GPUS * EP_INSTANCE_COUNT ))
|
|
96
|
+
fi
|
|
97
|
+
|
|
98
|
+
# ============================================================
|
|
99
|
+
# Detect Instance Pools
|
|
100
|
+
# ============================================================
|
|
101
|
+
HAS_INSTANCE_POOLS=false
|
|
102
|
+
if echo "${ENDPOINT_JSON}" | grep -q '"InstancePools"'; then
|
|
103
|
+
HAS_INSTANCE_POOLS=true
|
|
104
|
+
fi
|
|
105
|
+
|
|
106
|
+
# ============================================================
|
|
107
|
+
# Print Endpoint Status
|
|
108
|
+
# ============================================================
|
|
109
|
+
echo ""
|
|
110
|
+
if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
|
|
111
|
+
echo "Endpoint: ${ENDPOINT_NAME} (external) [${EP_STATUS}]"
|
|
112
|
+
else
|
|
113
|
+
echo "Endpoint: ${ENDPOINT_NAME} [${EP_STATUS}]"
|
|
114
|
+
fi
|
|
115
|
+
|
|
116
|
+
if [ "${HAS_INSTANCE_POOLS}" = "true" ]; then
|
|
117
|
+
# Instance pools path: show per-pool information
|
|
118
|
+
echo "Instance Pools:"
|
|
119
|
+
|
|
120
|
+
# Extract pool entries from the DescribeEndpoint response
|
|
121
|
+
# Each pool has: InstanceType, Priority, and CurrentInstanceCount (from the running endpoint)
|
|
122
|
+
# Parse using grep/sed — pools are in ProductionVariants[0].InstancePools array
|
|
123
|
+
pool_entries=$(echo "${ENDPOINT_JSON}" | grep -oE '"InstancePools"\s*:\s*\[[^]]*\]' | head -1 | sed 's/"InstancePools"\s*:\s*//')
|
|
124
|
+
|
|
125
|
+
if [ -n "${pool_entries}" ]; then
|
|
126
|
+
# Extract individual pool objects
|
|
127
|
+
# Each pool: {"InstanceType":"ml.xxx","Priority":N} — may also have CurrentInstanceCount
|
|
128
|
+
pool_types=$(echo "${pool_entries}" | grep -oE '"InstanceType"\s*:\s*"[^"]+"' | sed 's/"InstanceType"\s*:\s*"//;s/"$//')
|
|
129
|
+
pool_priorities=$(echo "${pool_entries}" | grep -oE '"Priority"\s*:\s*[0-9]+' | sed 's/"Priority"\s*:\s*//')
|
|
130
|
+
|
|
131
|
+
# CurrentInstanceCount may appear per-pool in the response
|
|
132
|
+
# If not per-pool, fall back to the endpoint-level CurrentInstanceCount
|
|
133
|
+
pool_instance_counts=$(echo "${pool_entries}" | grep -oE '"CurrentInstanceCount"\s*:\s*[0-9]+' | sed 's/"CurrentInstanceCount"\s*:\s*//')
|
|
134
|
+
|
|
135
|
+
# Convert to arrays
|
|
136
|
+
IFS=$'\n' read -r -d '' -a types_arr <<< "${pool_types}" || true
|
|
137
|
+
IFS=$'\n' read -r -d '' -a priorities_arr <<< "${pool_priorities}" || true
|
|
138
|
+
IFS=$'\n' read -r -d '' -a counts_arr <<< "${pool_instance_counts}" || true
|
|
139
|
+
|
|
140
|
+
for i in "${!types_arr[@]}"; do
|
|
141
|
+
local_type="${types_arr[$i]}"
|
|
142
|
+
local_priority="${priorities_arr[$i]:-$((i+1))}"
|
|
143
|
+
local_count="${counts_arr[$i]:-0}"
|
|
144
|
+
|
|
145
|
+
# Mark pools with instances > 0 as active
|
|
146
|
+
if [ "${local_count}" -gt 0 ] 2>/dev/null; then
|
|
147
|
+
printf " Priority %s: %-20s (%s instances) ← active\n" "${local_priority}" "${local_type}" "${local_count}"
|
|
148
|
+
else
|
|
149
|
+
printf " Priority %s: %-20s (%s instances)\n" "${local_priority}" "${local_type}" "${local_count}"
|
|
150
|
+
fi
|
|
151
|
+
done
|
|
152
|
+
fi
|
|
153
|
+
else
|
|
154
|
+
# Standard single instance type path
|
|
155
|
+
if [ -n "${TOTAL_GPUS}" ]; then
|
|
156
|
+
echo "Instance: ${EP_INSTANCE_TYPE} (${EP_INSTANCE_COUNT} instance, ${TOTAL_GPUS} GPUs)"
|
|
157
|
+
else
|
|
158
|
+
echo "Instance: ${EP_INSTANCE_TYPE} (${EP_INSTANCE_COUNT} instance)"
|
|
159
|
+
fi
|
|
160
|
+
fi
|
|
161
|
+
echo ""
|
|
162
|
+
|
|
163
|
+
# ============================================================
|
|
164
|
+
# Describe Inference Components
|
|
165
|
+
# ============================================================
|
|
166
|
+
TOTAL_GPU_USED=0
|
|
167
|
+
IC_ROWS=""
|
|
168
|
+
|
|
169
|
+
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
170
|
+
# Multi-IC path: iterate do/ic/*.conf files
|
|
171
|
+
# NOTE: Only base ICs in do/ic/ are counted toward GPU usage.
|
|
172
|
+
# Adapter ICs (do/adapters/*.conf) share the base IC's GPU resources
|
|
173
|
+
# and do not have their own ComputeResourceRequirements, so they are
|
|
174
|
+
# intentionally excluded from GPU capacity calculations.
|
|
175
|
+
HAS_ICS=false
|
|
176
|
+
|
|
177
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
178
|
+
[ -f "${conf}" ] || continue
|
|
179
|
+
HAS_ICS=true
|
|
180
|
+
|
|
181
|
+
ic_basename=$(basename "${conf}" .conf)
|
|
182
|
+
|
|
183
|
+
# Read IC_DEPLOYED_NAME from the conf file
|
|
184
|
+
ic_deployed_name=""
|
|
185
|
+
if grep -q "^export IC_DEPLOYED_NAME=" "${conf}" 2>/dev/null; then
|
|
186
|
+
ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
|
|
187
|
+
fi
|
|
188
|
+
|
|
189
|
+
if [ -z "${ic_deployed_name}" ]; then
|
|
190
|
+
IC_ROWS="${IC_ROWS}$(printf "%-18s %-12s %-6s %-6s" "${ic_basename}" "Not Deployed" "-" "-")\n"
|
|
191
|
+
continue
|
|
192
|
+
fi
|
|
193
|
+
|
|
194
|
+
# Call DescribeInferenceComponent
|
|
195
|
+
IC_JSON=$(aws sagemaker describe-inference-component \
|
|
196
|
+
--inference-component-name "${ic_deployed_name}" \
|
|
197
|
+
--region "${AWS_REGION}" 2>/dev/null) || {
|
|
198
|
+
IC_ROWS="${IC_ROWS}$(printf "%-18s %-12s %-6s %-6s" "${ic_basename}" "Not Found" "-" "-")\n"
|
|
199
|
+
continue
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
ic_status=$(echo "${IC_JSON}" | grep -o '"InferenceComponentStatus":"[^"]*"' | head -1 | cut -d'"' -f4)
|
|
203
|
+
ic_status="${ic_status:-Unknown}"
|
|
204
|
+
|
|
205
|
+
ic_gpu_count=$(echo "${IC_JSON}" | grep -o '"NumberOfAcceleratorDevicesRequired":[0-9]*' | head -1 | cut -d':' -f2)
|
|
206
|
+
ic_gpu_count="${ic_gpu_count:-0}"
|
|
207
|
+
|
|
208
|
+
ic_copy_count=$(echo "${IC_JSON}" | grep -o '"DesiredCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
|
|
209
|
+
if [ -z "${ic_copy_count}" ]; then
|
|
210
|
+
ic_copy_count=$(echo "${IC_JSON}" | grep -o '"CurrentCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
|
|
211
|
+
fi
|
|
212
|
+
ic_copy_count="${ic_copy_count:-1}"
|
|
213
|
+
|
|
214
|
+
TOTAL_GPU_USED=$(( TOTAL_GPU_USED + ic_gpu_count ))
|
|
215
|
+
|
|
216
|
+
IC_ROWS="${IC_ROWS}$(printf "%-18s %-12s %-6s %-6s" "${ic_basename}" "${ic_status}" "${ic_gpu_count}" "${ic_copy_count}")\n"
|
|
217
|
+
done
|
|
218
|
+
|
|
219
|
+
if [ "${HAS_ICS}" = true ]; then
|
|
220
|
+
echo "Inference Components:"
|
|
221
|
+
printf "%-18s %-12s %-6s %-6s\n" "NAME" "STATUS" "GPUs" "COPIES"
|
|
222
|
+
echo -e "${IC_ROWS}" | head -n -1
|
|
223
|
+
echo " ─────"
|
|
224
|
+
if [ -n "${TOTAL_GPUS}" ]; then
|
|
225
|
+
printf "Total GPU usage: %s/%s\n" "${TOTAL_GPU_USED}" "${TOTAL_GPUS}"
|
|
226
|
+
else
|
|
227
|
+
printf "Total GPU usage: %s\n" "${TOTAL_GPU_USED}"
|
|
228
|
+
fi
|
|
229
|
+
else
|
|
230
|
+
echo "No IC config files found in do/ic/"
|
|
231
|
+
fi
|
|
232
|
+
else
|
|
233
|
+
# Legacy single-IC path: use INFERENCE_COMPONENT_NAME from config
|
|
234
|
+
ic_name="${INFERENCE_COMPONENT_NAME:-}"
|
|
235
|
+
|
|
236
|
+
if [ -z "${ic_name}" ]; then
|
|
237
|
+
echo "No inference component deployed."
|
|
238
|
+
echo "Run ./do/deploy to create one."
|
|
239
|
+
else
|
|
240
|
+
IC_JSON=$(aws sagemaker describe-inference-component \
|
|
241
|
+
--inference-component-name "${ic_name}" \
|
|
242
|
+
--region "${AWS_REGION}" 2>/dev/null) || {
|
|
243
|
+
echo "Inference Components:"
|
|
244
|
+
printf "%-18s %-12s %-6s %-6s\n" "NAME" "STATUS" "GPUs" "COPIES"
|
|
245
|
+
printf "%-18s %-12s %-6s %-6s\n" "default" "Not Found" "-" "-"
|
|
246
|
+
echo ""
|
|
247
|
+
exit 0
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
ic_status=$(echo "${IC_JSON}" | grep -o '"InferenceComponentStatus":"[^"]*"' | head -1 | cut -d'"' -f4)
|
|
251
|
+
ic_status="${ic_status:-Unknown}"
|
|
252
|
+
|
|
253
|
+
ic_gpu_count=$(echo "${IC_JSON}" | grep -o '"NumberOfAcceleratorDevicesRequired":[0-9]*' | head -1 | cut -d':' -f2)
|
|
254
|
+
ic_gpu_count="${ic_gpu_count:-0}"
|
|
255
|
+
|
|
256
|
+
ic_copy_count=$(echo "${IC_JSON}" | grep -o '"DesiredCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
|
|
257
|
+
if [ -z "${ic_copy_count}" ]; then
|
|
258
|
+
ic_copy_count=$(echo "${IC_JSON}" | grep -o '"CurrentCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
|
|
259
|
+
fi
|
|
260
|
+
ic_copy_count="${ic_copy_count:-1}"
|
|
261
|
+
|
|
262
|
+
TOTAL_GPU_USED=$(( TOTAL_GPU_USED + ic_gpu_count ))
|
|
263
|
+
|
|
264
|
+
echo "Inference Components:"
|
|
265
|
+
printf "%-18s %-12s %-6s %-6s\n" "NAME" "STATUS" "GPUs" "COPIES"
|
|
266
|
+
printf "%-18s %-12s %-6s %-6s\n" "default" "${ic_status}" "${ic_gpu_count}" "${ic_copy_count}"
|
|
267
|
+
echo " ─────"
|
|
268
|
+
if [ -n "${TOTAL_GPUS}" ]; then
|
|
269
|
+
printf "Total GPU usage: %s/%s\n" "${TOTAL_GPU_USED}" "${TOTAL_GPUS}"
|
|
270
|
+
else
|
|
271
|
+
printf "Total GPU usage: %s\n" "${TOTAL_GPU_USED}"
|
|
272
|
+
fi
|
|
273
|
+
fi
|
|
274
|
+
fi
|
|
275
|
+
|
|
276
|
+
echo ""
|
|
277
|
+
|
|
278
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
279
|
+
# ============================================================
|
|
280
|
+
# Describe LoRA Adapters
|
|
281
|
+
# ============================================================
|
|
282
|
+
if [ "${ENABLE_LORA:-}" = "true" ]; then
|
|
283
|
+
# List all inference components on the endpoint
|
|
284
|
+
ADAPTER_IC_LIST=$(aws sagemaker list-inference-components \
|
|
285
|
+
--endpoint-name-equals "${ENDPOINT_NAME}" \
|
|
286
|
+
--region "${AWS_REGION}" 2>/dev/null) || ADAPTER_IC_LIST=""
|
|
287
|
+
|
|
288
|
+
if [ -n "${ADAPTER_IC_LIST}" ]; then
|
|
289
|
+
# Extract IC names
|
|
290
|
+
ADAPTER_IC_NAMES=$(echo "${ADAPTER_IC_LIST}" | jq -r '.InferenceComponents[].InferenceComponentName' 2>/dev/null)
|
|
291
|
+
|
|
292
|
+
# Filter to adapter ICs (those with BaseInferenceComponentName) and collect details
|
|
293
|
+
ADAPTER_ROWS=""
|
|
294
|
+
ADAPTER_COUNT=0
|
|
295
|
+
|
|
296
|
+
for ic_name in ${ADAPTER_IC_NAMES}; do
|
|
297
|
+
# Describe each IC to check if it's an adapter
|
|
298
|
+
ic_detail=$(aws sagemaker describe-inference-component \
|
|
299
|
+
--inference-component-name "${ic_name}" \
|
|
300
|
+
--region "${AWS_REGION}" 2>/dev/null) || continue
|
|
301
|
+
|
|
302
|
+
# Check if this IC has a BaseInferenceComponentName (adapter IC)
|
|
303
|
+
base_ic=$(echo "${ic_detail}" | jq -r '.Specification.BaseInferenceComponentName // empty' 2>/dev/null)
|
|
304
|
+
|
|
305
|
+
if [ -z "${base_ic}" ]; then
|
|
306
|
+
# Not an adapter IC — skip
|
|
307
|
+
continue
|
|
308
|
+
fi
|
|
309
|
+
|
|
310
|
+
# Extract status and artifact URL
|
|
311
|
+
adapter_status=$(echo "${ic_detail}" | jq -r '.InferenceComponentStatus // "Unknown"' 2>/dev/null)
|
|
312
|
+
adapter_weights=$(echo "${ic_detail}" | jq -r '.Specification.Container.ArtifactUrl // "N/A"' 2>/dev/null)
|
|
313
|
+
|
|
314
|
+
# Derive display name (strip project prefix if present)
|
|
315
|
+
display_name="${ic_name}"
|
|
316
|
+
if [[ "${ic_name}" == "${PROJECT_NAME}-adapter-"* ]]; then
|
|
317
|
+
display_name="${ic_name#${PROJECT_NAME}-adapter-}"
|
|
318
|
+
fi
|
|
319
|
+
|
|
320
|
+
ADAPTER_ROWS="${ADAPTER_ROWS}$(printf '%-14s%-12s%s' "${display_name}" "${adapter_status}" "${adapter_weights}")\n"
|
|
321
|
+
ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
|
|
322
|
+
done
|
|
323
|
+
|
|
324
|
+
echo "Adapters (LoRA): [max: <%= maxLoras %> GPU / 70 CPU]"
|
|
325
|
+
if [ "${ADAPTER_COUNT}" -eq 0 ]; then
|
|
326
|
+
echo " No adapters deployed"
|
|
327
|
+
else
|
|
328
|
+
printf '%-14s%-12s%s\n' "NAME" "STATUS" "WEIGHTS"
|
|
329
|
+
echo -e "${ADAPTER_ROWS}" | head -n -1
|
|
330
|
+
fi
|
|
331
|
+
else
|
|
332
|
+
echo "Adapters (LoRA): [max: <%= maxLoras %> GPU / 70 CPU]"
|
|
333
|
+
echo " No adapters deployed"
|
|
334
|
+
fi
|
|
335
|
+
echo ""
|
|
336
|
+
fi
|
|
337
|
+
<% } %>
|
package/templates/do/test
CHANGED
|
@@ -15,10 +15,11 @@ source "${SCRIPT_DIR}/config"
|
|
|
15
15
|
# SageMaker Real-Time Inference Testing
|
|
16
16
|
# ============================================================
|
|
17
17
|
|
|
18
|
-
# Parse arguments
|
|
19
|
-
|
|
18
|
+
# Parse arguments: ./do/test [<ic-name>]
|
|
19
|
+
IC_ARG="${1:-}"
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
# Determine test mode based on ENDPOINT_NAME in config
|
|
22
|
+
if [ -z "${ENDPOINT_NAME:-}" ]; then
|
|
22
23
|
echo "🧪 Testing local container at localhost:8080"
|
|
23
24
|
echo " Project: ${PROJECT_NAME}"
|
|
24
25
|
echo " Framework: ${FRAMEWORK}"
|
|
@@ -210,8 +211,53 @@ else
|
|
|
210
211
|
# Create temporary file for response
|
|
211
212
|
TEMP_RESPONSE=$(mktemp)
|
|
212
213
|
|
|
213
|
-
#
|
|
214
|
-
|
|
214
|
+
# Resolve inference component name
|
|
215
|
+
# Precedence: do/adapters/ → do/ic/ → legacy config
|
|
216
|
+
IC_NAME=""
|
|
217
|
+
if [ -n "${IC_ARG}" ] && [ -f "${SCRIPT_DIR}/adapters/${IC_ARG}.conf" ]; then
|
|
218
|
+
# Argument matches an adapter name — use adapter IC
|
|
219
|
+
ADAPTER_IC_NAME=""
|
|
220
|
+
source "${SCRIPT_DIR}/adapters/${IC_ARG}.conf"
|
|
221
|
+
if [ -z "${ADAPTER_IC_NAME}" ]; then
|
|
222
|
+
echo "❌ Adapter '${IC_ARG}' conf is missing ADAPTER_IC_NAME."
|
|
223
|
+
exit 1
|
|
224
|
+
fi
|
|
225
|
+
IC_NAME="${ADAPTER_IC_NAME}"
|
|
226
|
+
elif [ -n "${IC_ARG}" ]; then
|
|
227
|
+
# Explicit IC name provided as argument
|
|
228
|
+
IC_CONF="${SCRIPT_DIR}/ic/${IC_ARG}.conf"
|
|
229
|
+
if [ ! -f "${IC_CONF}" ]; then
|
|
230
|
+
echo "❌ IC config not found: do/ic/${IC_ARG}.conf"
|
|
231
|
+
exit 1
|
|
232
|
+
fi
|
|
233
|
+
IC_DEPLOYED_NAME=""
|
|
234
|
+
source "${IC_CONF}"
|
|
235
|
+
if [ -z "${IC_DEPLOYED_NAME}" ]; then
|
|
236
|
+
echo "❌ IC '${IC_ARG}' has not been deployed yet. Run ./do/deploy --ic ${IC_ARG} first."
|
|
237
|
+
exit 1
|
|
238
|
+
fi
|
|
239
|
+
IC_NAME="${IC_DEPLOYED_NAME}"
|
|
240
|
+
elif [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
241
|
+
# No argument, but do/ic/ exists — use first IC alphabetically
|
|
242
|
+
IC_NAME=""
|
|
243
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
244
|
+
[ -f "${conf}" ] || continue
|
|
245
|
+
IC_DEPLOYED_NAME=""
|
|
246
|
+
source "${conf}"
|
|
247
|
+
if [ -n "${IC_DEPLOYED_NAME}" ]; then
|
|
248
|
+
IC_NAME="${IC_DEPLOYED_NAME}"
|
|
249
|
+
break
|
|
250
|
+
fi
|
|
251
|
+
done
|
|
252
|
+
if [ -z "${IC_NAME}" ]; then
|
|
253
|
+
echo "❌ No ICs deployed. Run ./do/deploy first."
|
|
254
|
+
exit 1
|
|
255
|
+
fi
|
|
256
|
+
else
|
|
257
|
+
# Legacy: no do/ic/ directory, use INFERENCE_COMPONENT_NAME from do/config
|
|
258
|
+
IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
|
|
259
|
+
fi
|
|
260
|
+
|
|
215
261
|
INVOKE_ARGS=(
|
|
216
262
|
--endpoint-name "${ENDPOINT_NAME}"
|
|
217
263
|
--region "${AWS_REGION}"
|
|
@@ -285,12 +331,15 @@ if [ "${TEST_MODE}" = "local" ]; then
|
|
|
285
331
|
echo " • Push to ECR: ./do/push"
|
|
286
332
|
echo " • Deploy to SageMaker: ./do/deploy"
|
|
287
333
|
else
|
|
288
|
-
echo "
|
|
289
|
-
|
|
290
|
-
echo "
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
echo "
|
|
334
|
+
echo "📋 What's next?"
|
|
335
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
336
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
337
|
+
<% } %>
|
|
338
|
+
<% if (typeof enableLora !== 'undefined' && enableLora) { %>
|
|
339
|
+
echo " • Add a LoRA adapter: ./do/adapter add <name> --weights s3://..."
|
|
340
|
+
<% } %>
|
|
341
|
+
echo " • Register this deployment: ./do/register"
|
|
342
|
+
echo " • View logs: ./do/logs"
|
|
294
343
|
fi
|
|
295
344
|
|
|
296
345
|
<% } else if (deploymentTarget === 'async-inference') { %>
|
|
@@ -599,13 +648,13 @@ if [ "${TEST_MODE}" = "local" ]; then
|
|
|
599
648
|
echo " • Push to ECR: ./do/push"
|
|
600
649
|
echo " • Deploy to SageMaker: ./do/deploy"
|
|
601
650
|
else
|
|
602
|
-
echo "
|
|
603
|
-
|
|
604
|
-
echo "
|
|
605
|
-
|
|
606
|
-
echo ""
|
|
607
|
-
echo "
|
|
608
|
-
echo "
|
|
651
|
+
echo "📋 What's next?"
|
|
652
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
653
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
654
|
+
<% } %>
|
|
655
|
+
echo " • Check async output: aws s3 ls ${ASYNC_S3_OUTPUT_PATH}"
|
|
656
|
+
echo " • Register this deployment: ./do/register"
|
|
657
|
+
echo " • View logs: ./do/logs"
|
|
609
658
|
fi
|
|
610
659
|
|
|
611
660
|
<% } else if (deploymentTarget === 'hyperpod-eks') { %>
|
|
@@ -864,13 +913,14 @@ if [ "${TEST_TARGET}" = "local" ]; then
|
|
|
864
913
|
echo " • Push to ECR: ./do/push"
|
|
865
914
|
echo " • Deploy to HyperPod: ./do/deploy"
|
|
866
915
|
else
|
|
867
|
-
echo "
|
|
868
|
-
|
|
869
|
-
echo "
|
|
870
|
-
|
|
871
|
-
echo ""
|
|
872
|
-
echo "
|
|
873
|
-
echo "
|
|
916
|
+
echo "📋 What's next?"
|
|
917
|
+
<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
|
|
918
|
+
echo " • Benchmark performance: ./do/benchmark"
|
|
919
|
+
<% } %>
|
|
920
|
+
echo " • Check pod status: kubectl get pods -n ${HYPERPOD_NAMESPACE}"
|
|
921
|
+
echo " • View pod logs: kubectl logs -n ${HYPERPOD_NAMESPACE} -l app=${PROJECT_NAME}"
|
|
922
|
+
echo " • Register this deployment: ./do/register"
|
|
923
|
+
echo " • View logs: ./do/logs"
|
|
874
924
|
fi
|
|
875
925
|
|
|
876
926
|
<% } else if (deploymentTarget === 'batch-transform') { %>
|
|
@@ -950,8 +1000,10 @@ case "${TEST_TARGET}" in
|
|
|
950
1000
|
echo ""
|
|
951
1001
|
echo "✅ All tests passed!"
|
|
952
1002
|
echo ""
|
|
953
|
-
echo "
|
|
954
|
-
echo "
|
|
1003
|
+
echo "📋 What's next?"
|
|
1004
|
+
echo " • View results: cat batch-output/"
|
|
1005
|
+
echo " • Register this deployment: ./do/register"
|
|
1006
|
+
echo " • View logs: ./do/logs"
|
|
955
1007
|
;;
|
|
956
1008
|
InProgress)
|
|
957
1009
|
echo "⏳ Transform job is still in progress"
|
|
@@ -1144,4 +1196,4 @@ echo "Next steps:"
|
|
|
1144
1196
|
echo " • Push to ECR: ./do/push"
|
|
1145
1197
|
echo " • Deploy batch transform: ./do/deploy"
|
|
1146
1198
|
|
|
1147
|
-
<% } %>
|
|
1199
|
+
<% } %>
|