@aws/ml-container-creator 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +5 -2
- package/infra/ci-harness/buildspec.yml +60 -0
- package/package.json +1 -1
- package/servers/README.md +41 -1
- package/servers/instance-sizer/index.js +6 -0
- package/src/app.js +33 -2
- package/src/lib/config-manager.js +40 -1
- package/src/lib/deployment-entry-schema.js +16 -0
- package/src/lib/prompt-runner.js +174 -3
- package/src/lib/prompts.js +222 -2
- package/src/lib/registry-command-handler.js +12 -0
- package/templates/Dockerfile +12 -0
- package/templates/code/serving.properties +14 -0
- package/templates/do/adapter +1214 -0
- package/templates/do/adapters/.gitkeep +2 -0
- package/templates/do/add-ic +130 -0
- package/templates/do/benchmark +81 -9
- package/templates/do/clean +507 -17
- package/templates/do/config +23 -1
- package/templates/do/deploy +513 -367
- package/templates/do/ic/default.conf +32 -0
- package/templates/do/lib/endpoint-config.sh +216 -0
- package/templates/do/lib/inference-component.sh +167 -0
- package/templates/do/lib/secrets.sh +44 -0
- package/templates/do/lib/wait.sh +131 -0
- package/templates/do/logs +107 -27
- package/templates/do/optimize +528 -0
- package/templates/do/register +111 -1
- package/templates/do/status +337 -0
- package/templates/do/test +80 -28
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
# do/optimize — Run SageMaker AI Inference Recommendations to find optimal
|
|
6
|
+
# instance types and model configurations for your workload.
|
|
7
|
+
# Wraps CreateAIRecommendationJob / DescribeAIRecommendationJob.
|
|
8
|
+
|
|
9
|
+
set -e
|
|
10
|
+
set -u
|
|
11
|
+
set -o pipefail
|
|
12
|
+
|
|
13
|
+
# ── Source project configuration ──────────────────────────────────────────────
|
|
14
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
15
|
+
source "${SCRIPT_DIR}/config"
|
|
16
|
+
source "${SCRIPT_DIR}/lib/wait.sh"
|
|
17
|
+
|
|
18
|
+
# ── Parse flags ───────────────────────────────────────────────────────────────
|
|
19
|
+
GOAL=""
|
|
20
|
+
INSTANCES_ARG=""
|
|
21
|
+
FORCE=false
|
|
22
|
+
while [ $# -gt 0 ]; do
|
|
23
|
+
case "$1" in
|
|
24
|
+
--goal) shift; GOAL="${1:-}"; shift ;;
|
|
25
|
+
--instances) shift; INSTANCES_ARG="${1:-}"; shift ;;
|
|
26
|
+
--force) FORCE=true; shift ;;
|
|
27
|
+
--help|-h)
|
|
28
|
+
echo "Usage: ./do/optimize --goal <cost|latency|throughput> [--instances type1,type2] [--force]"
|
|
29
|
+
echo ""
|
|
30
|
+
echo "Run SageMaker AI Inference Recommendations to find optimal"
|
|
31
|
+
echo "instance types and model configurations for your workload."
|
|
32
|
+
echo ""
|
|
33
|
+
echo "Options:"
|
|
34
|
+
echo " --goal <goal> Optimization goal: cost, latency, or throughput (required)"
|
|
35
|
+
echo " --instances <list> Comma-separated instance types to evaluate"
|
|
36
|
+
echo " Defaults to INSTANCE_POOLS entries or INSTANCE_TYPE from do/config"
|
|
37
|
+
echo " --force Create a new recommendation job even if one exists"
|
|
38
|
+
echo ""
|
|
39
|
+
echo "Examples:"
|
|
40
|
+
echo " ./do/optimize --goal throughput"
|
|
41
|
+
echo " ./do/optimize --goal cost --instances ml.g6e.48xlarge,ml.p5.48xlarge"
|
|
42
|
+
echo " ./do/optimize --goal latency --force"
|
|
43
|
+
echo ""
|
|
44
|
+
echo "Idempotency:"
|
|
45
|
+
echo " If OPTIMIZE_JOB_NAME is set in do/config and the job is still running,"
|
|
46
|
+
echo " re-running without --force will resume waiting for the existing job."
|
|
47
|
+
echo ""
|
|
48
|
+
echo "Prerequisites:"
|
|
49
|
+
echo " • MODEL_NAME must be set in do/config (HuggingFace model ID or S3 path)"
|
|
50
|
+
echo " • AWS credentials must be configured"
|
|
51
|
+
exit 0
|
|
52
|
+
;;
|
|
53
|
+
*) shift ;;
|
|
54
|
+
esac
|
|
55
|
+
done
|
|
56
|
+
|
|
57
|
+
# ── Validate goal ─────────────────────────────────────────────────────────────
|
|
58
|
+
if [ -z "${GOAL}" ]; then
|
|
59
|
+
echo "❌ --goal is required. Choose one of: cost, latency, throughput"
|
|
60
|
+
echo " Example: ./do/optimize --goal throughput"
|
|
61
|
+
exit 1
|
|
62
|
+
fi
|
|
63
|
+
|
|
64
|
+
case "${GOAL}" in
|
|
65
|
+
cost|latency|throughput) ;;
|
|
66
|
+
*)
|
|
67
|
+
echo "❌ Invalid goal: ${GOAL}"
|
|
68
|
+
echo " Valid goals: cost, latency, throughput"
|
|
69
|
+
exit 1
|
|
70
|
+
;;
|
|
71
|
+
esac
|
|
72
|
+
|
|
73
|
+
# ── Verify AWS CLI v2 ─────────────────────────────────────────────────────────
|
|
74
|
+
if ! aws --version 2>&1 | grep -q "aws-cli/2"; then
|
|
75
|
+
echo "❌ AWS CLI v2 is required for inference recommendations."
|
|
76
|
+
echo " Detected: $(aws --version 2>&1 | head -1)"
|
|
77
|
+
echo ""
|
|
78
|
+
echo " Install CLI v2: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
|
|
79
|
+
exit 1
|
|
80
|
+
fi
|
|
81
|
+
|
|
82
|
+
# ── Resolve model name ────────────────────────────────────────────────────────
|
|
83
|
+
if [ -z "${MODEL_NAME:-}" ]; then
|
|
84
|
+
echo "❌ MODEL_NAME is not set in do/config"
|
|
85
|
+
echo " Set MODEL_NAME to a HuggingFace model ID or S3 model path."
|
|
86
|
+
exit 1
|
|
87
|
+
fi
|
|
88
|
+
|
|
89
|
+
echo "🔍 Inference Recommendations"
|
|
90
|
+
echo " Project: ${PROJECT_NAME}"
|
|
91
|
+
echo " Model: ${MODEL_NAME}"
|
|
92
|
+
echo " Goal: ${GOAL}"
|
|
93
|
+
|
|
94
|
+
# ── Resolve instance types ────────────────────────────────────────────────────
|
|
95
|
+
# Priority: --instances flag > INSTANCE_POOLS > INSTANCE_TYPE
|
|
96
|
+
INSTANCE_TYPES=""
|
|
97
|
+
|
|
98
|
+
if [ -n "${INSTANCES_ARG}" ]; then
|
|
99
|
+
# From --instances flag (comma-separated)
|
|
100
|
+
INSTANCE_TYPES="${INSTANCES_ARG}"
|
|
101
|
+
echo " Instances (from --instances): ${INSTANCE_TYPES}"
|
|
102
|
+
elif [ -n "${INSTANCE_POOLS:-}" ]; then
|
|
103
|
+
# Extract instance types from INSTANCE_POOLS JSON
|
|
104
|
+
INSTANCE_TYPES=$(echo "${INSTANCE_POOLS}" | grep -oE '"InstanceType"\s*:\s*"[^"]+"' | sed 's/"InstanceType"\s*:\s*"//;s/"$//' | paste -sd ',' - || true)
|
|
105
|
+
echo " Instances (from INSTANCE_POOLS): ${INSTANCE_TYPES}"
|
|
106
|
+
elif [ -n "${INSTANCE_TYPE:-}" ]; then
|
|
107
|
+
INSTANCE_TYPES="${INSTANCE_TYPE}"
|
|
108
|
+
echo " Instances (from INSTANCE_TYPE): ${INSTANCE_TYPES}"
|
|
109
|
+
else
|
|
110
|
+
echo "❌ No instance types available."
|
|
111
|
+
echo " Provide --instances flag, or set INSTANCE_POOLS or INSTANCE_TYPE in do/config."
|
|
112
|
+
exit 1
|
|
113
|
+
fi
|
|
114
|
+
|
|
115
|
+
# ── Resolve workload parameters from benchmark config (if available) ──────────
|
|
116
|
+
CONCURRENCY="${BENCHMARK_CONCURRENCY:-1}"
|
|
117
|
+
INPUT_TOKENS="${BENCHMARK_INPUT_TOKENS_MEAN:-256}"
|
|
118
|
+
OUTPUT_TOKENS="${BENCHMARK_OUTPUT_TOKENS_MEAN:-256}"
|
|
119
|
+
|
|
120
|
+
echo " Concurrency: ${CONCURRENCY}"
|
|
121
|
+
echo " Input tokens: ${INPUT_TOKENS}"
|
|
122
|
+
echo " Output tokens: ${OUTPUT_TOKENS}"
|
|
123
|
+
echo ""
|
|
124
|
+
|
|
125
|
+
# ── Helper: update a variable in do/config ────────────────────────────────────
|
|
126
|
+
_update_optimize_var() {
|
|
127
|
+
_update_config_var "$1" "$2"
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
# ── Idempotency: Check for existing recommendation job ────────────────────────
|
|
131
|
+
RESUME_EXISTING=false
|
|
132
|
+
|
|
133
|
+
if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
|
|
134
|
+
EXISTING_STATUS=$(aws sagemaker describe-ai-recommendation-job \
|
|
135
|
+
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
136
|
+
--region "${AWS_REGION}" \
|
|
137
|
+
--query 'Status' \
|
|
138
|
+
--output text 2>/dev/null) || EXISTING_STATUS=""
|
|
139
|
+
|
|
140
|
+
case "${EXISTING_STATUS}" in
|
|
141
|
+
IN_PROGRESS|PENDING|STARTING)
|
|
142
|
+
echo "📊 Resuming existing recommendation job: ${OPTIMIZE_JOB_NAME}"
|
|
143
|
+
echo " Status: ${EXISTING_STATUS}"
|
|
144
|
+
echo " (use --force to start a new job instead)"
|
|
145
|
+
echo ""
|
|
146
|
+
RESUME_EXISTING=true
|
|
147
|
+
;;
|
|
148
|
+
COMPLETED)
|
|
149
|
+
echo "📊 Previous recommendation job already completed: ${OPTIMIZE_JOB_NAME}"
|
|
150
|
+
echo " (use --force to start a new job)"
|
|
151
|
+
echo ""
|
|
152
|
+
RESUME_EXISTING=true
|
|
153
|
+
JOB_STATUS="COMPLETED"
|
|
154
|
+
;;
|
|
155
|
+
FAILED|STOPPED)
|
|
156
|
+
FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
|
|
157
|
+
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
158
|
+
--region "${AWS_REGION}" \
|
|
159
|
+
--query 'FailureReason' \
|
|
160
|
+
--output text 2>/dev/null) || FAILURE_REASON="unknown"
|
|
161
|
+
echo "⚠️ Previous recommendation job ${EXISTING_STATUS}: ${OPTIMIZE_JOB_NAME}"
|
|
162
|
+
if [ -n "${FAILURE_REASON}" ] && [ "${FAILURE_REASON}" != "None" ]; then
|
|
163
|
+
echo " Reason: ${FAILURE_REASON}"
|
|
164
|
+
fi
|
|
165
|
+
echo " Use --force to start a new job."
|
|
166
|
+
exit 1
|
|
167
|
+
;;
|
|
168
|
+
*)
|
|
169
|
+
# Job doesn't exist or can't be described — proceed with new job
|
|
170
|
+
;;
|
|
171
|
+
esac
|
|
172
|
+
fi
|
|
173
|
+
|
|
174
|
+
# ── Create recommendation job ─────────────────────────────────────────────────
|
|
175
|
+
if [ "${RESUME_EXISTING}" = false ]; then
|
|
176
|
+
OPTIMIZE_JOB_NAME="${PROJECT_NAME}-optimize-$(date +%Y%m%d-%H%M%S)"
|
|
177
|
+
|
|
178
|
+
echo "🚀 Creating AI Recommendation Job: ${OPTIMIZE_JOB_NAME}"
|
|
179
|
+
|
|
180
|
+
# Build instance type list as JSON array
|
|
181
|
+
INSTANCE_TYPES_JSON="["
|
|
182
|
+
FIRST=true
|
|
183
|
+
IFS=',' read -ra TYPES <<< "${INSTANCE_TYPES}"
|
|
184
|
+
for itype in "${TYPES[@]}"; do
|
|
185
|
+
itype=$(echo "${itype}" | xargs) # trim whitespace
|
|
186
|
+
if [ "${FIRST}" = true ]; then
|
|
187
|
+
INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON}\"${itype}\""
|
|
188
|
+
FIRST=false
|
|
189
|
+
else
|
|
190
|
+
INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON},\"${itype}\""
|
|
191
|
+
fi
|
|
192
|
+
done
|
|
193
|
+
INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON}]"
|
|
194
|
+
|
|
195
|
+
# Build job input config
|
|
196
|
+
# The model is specified as either a HuggingFace model ID or S3 path
|
|
197
|
+
MODEL_SOURCE_JSON=""
|
|
198
|
+
if [[ "${MODEL_NAME}" == s3://* ]]; then
|
|
199
|
+
MODEL_SOURCE_JSON="{\"S3DataSource\":{\"S3Uri\":\"${MODEL_NAME}\"}}"
|
|
200
|
+
else
|
|
201
|
+
MODEL_SOURCE_JSON="{\"ModelName\":\"${MODEL_NAME}\"}"
|
|
202
|
+
fi
|
|
203
|
+
|
|
204
|
+
# Build workload config
|
|
205
|
+
WORKLOAD_JSON="{\"Concurrency\":${CONCURRENCY},\"InputTokens\":${INPUT_TOKENS},\"OutputTokens\":${OUTPUT_TOKENS}}"
|
|
206
|
+
|
|
207
|
+
# Build the full job specification
|
|
208
|
+
JOB_INPUT="{\"ModelSource\":${MODEL_SOURCE_JSON},\"Workload\":${WORKLOAD_JSON},\"InstanceTypes\":${INSTANCE_TYPES_JSON},\"OptimizationGoal\":\"${GOAL}\"}"
|
|
209
|
+
|
|
210
|
+
if ! aws sagemaker create-ai-recommendation-job \
|
|
211
|
+
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
212
|
+
--role-arn "${ROLE_ARN}" \
|
|
213
|
+
--input-config "${JOB_INPUT}" \
|
|
214
|
+
--region "${AWS_REGION}"; then
|
|
215
|
+
echo "❌ Failed to create AI Recommendation Job"
|
|
216
|
+
echo " Check that:"
|
|
217
|
+
echo " • The execution role has sagemaker:CreateAIRecommendationJob permission"
|
|
218
|
+
echo " • The model name or S3 path is valid: ${MODEL_NAME}"
|
|
219
|
+
echo " • The instance types are valid: ${INSTANCE_TYPES}"
|
|
220
|
+
echo " • The API is available in region: ${AWS_REGION}"
|
|
221
|
+
exit 1
|
|
222
|
+
fi
|
|
223
|
+
|
|
224
|
+
echo "✅ Recommendation job created: ${OPTIMIZE_JOB_NAME}"
|
|
225
|
+
|
|
226
|
+
# Save job name to do/config for idempotency on re-run
|
|
227
|
+
_update_optimize_var "OPTIMIZE_JOB_NAME" "${OPTIMIZE_JOB_NAME}"
|
|
228
|
+
echo ""
|
|
229
|
+
fi
|
|
230
|
+
|
|
231
|
+
# ── Poll for completion ───────────────────────────────────────────────────────
|
|
232
|
+
POLL_INTERVAL=30
|
|
233
|
+
MAX_POLL_ATTEMPTS=120 # 60 minutes max (120 * 30s)
|
|
234
|
+
|
|
235
|
+
if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ]; then
|
|
236
|
+
|
|
237
|
+
echo "⏳ Waiting for recommendation job to complete..."
|
|
238
|
+
echo " Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 60 min)"
|
|
239
|
+
echo ""
|
|
240
|
+
|
|
241
|
+
POLL_COUNT=0
|
|
242
|
+
JOB_STATUS=""
|
|
243
|
+
|
|
244
|
+
while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
|
|
245
|
+
JOB_STATUS=$(aws sagemaker describe-ai-recommendation-job \
|
|
246
|
+
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
247
|
+
--region "${AWS_REGION}" \
|
|
248
|
+
--query 'Status' \
|
|
249
|
+
--output text 2>/dev/null) || {
|
|
250
|
+
echo "⚠️ Failed to describe recommendation job (credentials may have expired)"
|
|
251
|
+
echo " Re-run to check status:"
|
|
252
|
+
echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
|
|
253
|
+
exit 1
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
case "${JOB_STATUS}" in
|
|
257
|
+
COMPLETED)
|
|
258
|
+
echo "✅ Recommendation job completed!"
|
|
259
|
+
break
|
|
260
|
+
;;
|
|
261
|
+
FAILED)
|
|
262
|
+
echo "❌ Recommendation job failed"
|
|
263
|
+
break
|
|
264
|
+
;;
|
|
265
|
+
STOPPED)
|
|
266
|
+
echo "⚠️ Recommendation job was stopped"
|
|
267
|
+
break
|
|
268
|
+
;;
|
|
269
|
+
*)
|
|
270
|
+
POLL_COUNT=$((POLL_COUNT + 1))
|
|
271
|
+
ELAPSED=$((POLL_COUNT * POLL_INTERVAL))
|
|
272
|
+
echo " $(date +%H:%M:%S) Status: ${JOB_STATUS} (${ELAPSED}s elapsed)"
|
|
273
|
+
sleep ${POLL_INTERVAL}
|
|
274
|
+
;;
|
|
275
|
+
esac
|
|
276
|
+
done
|
|
277
|
+
|
|
278
|
+
if [ ${POLL_COUNT} -ge ${MAX_POLL_ATTEMPTS} ]; then
|
|
279
|
+
echo ""
|
|
280
|
+
echo "⚠️ Recommendation job timed out after 60 minutes (status: ${JOB_STATUS})"
|
|
281
|
+
echo " The job may still be running. Re-run ./do/optimize to resume waiting."
|
|
282
|
+
exit 1
|
|
283
|
+
fi
|
|
284
|
+
|
|
285
|
+
fi # end of polling conditional
|
|
286
|
+
|
|
287
|
+
echo ""
|
|
288
|
+
|
|
289
|
+
# ── Display results ───────────────────────────────────────────────────────────
|
|
290
|
+
if [ "${JOB_STATUS}" = "COMPLETED" ]; then
|
|
291
|
+
echo "📊 Fetching recommendation results..."
|
|
292
|
+
|
|
293
|
+
# Get the full job description with results
|
|
294
|
+
JOB_DESCRIPTION=$(aws sagemaker describe-ai-recommendation-job \
|
|
295
|
+
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
296
|
+
--region "${AWS_REGION}" \
|
|
297
|
+
--output json 2>/dev/null) || {
|
|
298
|
+
echo "❌ Failed to fetch recommendation results"
|
|
299
|
+
exit 1
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
# Display results using python for JSON parsing
|
|
303
|
+
if command -v python3 &>/dev/null; then
|
|
304
|
+
echo "${JOB_DESCRIPTION}" | python3 -c "
|
|
305
|
+
import json, sys
|
|
306
|
+
|
|
307
|
+
data = json.load(sys.stdin)
|
|
308
|
+
results = data.get('Results', data.get('InferenceRecommendations', []))
|
|
309
|
+
|
|
310
|
+
if not results:
|
|
311
|
+
print(' No recommendations returned.')
|
|
312
|
+
sys.exit(0)
|
|
313
|
+
|
|
314
|
+
# If results is a dict with a list inside, extract it
|
|
315
|
+
if isinstance(results, dict):
|
|
316
|
+
results = results.get('Recommendations', results.get('InferenceRecommendations', []))
|
|
317
|
+
|
|
318
|
+
print('╔══════════════════════════════════════════════════════════════════════════╗')
|
|
319
|
+
print('║ SageMaker AI Inference Recommendations ║')
|
|
320
|
+
print('╠══════════════════════════════════════════════════════════════════════════╣')
|
|
321
|
+
print(f'║ Job: ${OPTIMIZE_JOB_NAME}')
|
|
322
|
+
print(f'║ Goal: ${GOAL}')
|
|
323
|
+
print(f'║ Model: ${MODEL_NAME}')
|
|
324
|
+
print('╠══════════════════════════════════════════════════════════════════════════╣')
|
|
325
|
+
|
|
326
|
+
for i, rec in enumerate(results, 1):
|
|
327
|
+
instance_type = rec.get('InstanceType', rec.get('EndpointConfiguration', {}).get('InstanceType', 'N/A'))
|
|
328
|
+
model_config = rec.get('ModelConfiguration', {})
|
|
329
|
+
model_package_arn = model_config.get('ModelPackageArn', rec.get('ModelPackageArn', 'N/A'))
|
|
330
|
+
inference_spec = model_config.get('InferenceSpecificationName', rec.get('InferenceSpecificationName', 'N/A'))
|
|
331
|
+
|
|
332
|
+
metrics = rec.get('Metrics', rec.get('RecommendationMetrics', {}))
|
|
333
|
+
ttft = metrics.get('TimeToFirstToken', metrics.get('TTFT', 'N/A'))
|
|
334
|
+
itl = metrics.get('InterTokenLatency', metrics.get('ITL', 'N/A'))
|
|
335
|
+
throughput = metrics.get('Throughput', metrics.get('MaxInvocations', 'N/A'))
|
|
336
|
+
cost = metrics.get('CostPerHour', metrics.get('CostPerInference', 'N/A'))
|
|
337
|
+
|
|
338
|
+
rank_marker = ' ← TOP' if i == 1 else ''
|
|
339
|
+
print(f'║')
|
|
340
|
+
print(f'║ #{i}{rank_marker}')
|
|
341
|
+
print(f'║ Instance Type: {instance_type}')
|
|
342
|
+
print(f'║ ModelPackageArn: {model_package_arn}')
|
|
343
|
+
print(f'║ InferenceSpec: {inference_spec}')
|
|
344
|
+
print(f'║ ────────────────────────────────────────')
|
|
345
|
+
print(f'║ TTFT (ms): {ttft}')
|
|
346
|
+
print(f'║ ITL (ms): {itl}')
|
|
347
|
+
print(f'║ Throughput: {throughput}')
|
|
348
|
+
print(f'║ Cost: {cost}')
|
|
349
|
+
|
|
350
|
+
print('║')
|
|
351
|
+
print('╚══════════════════════════════════════════════════════════════════════════╝')
|
|
352
|
+
|
|
353
|
+
# Output structured data for the interactive choices below
|
|
354
|
+
# Write results to a temp file for bash to read
|
|
355
|
+
import tempfile, os
|
|
356
|
+
results_file = os.path.join('${SCRIPT_DIR}', '.optimize-results.json')
|
|
357
|
+
with open(results_file, 'w') as f:
|
|
358
|
+
json.dump(results, f)
|
|
359
|
+
"
|
|
360
|
+
else
|
|
361
|
+
echo " (python3 not available — showing raw JSON)"
|
|
362
|
+
echo "${JOB_DESCRIPTION}" | head -100
|
|
363
|
+
fi
|
|
364
|
+
|
|
365
|
+
echo ""
|
|
366
|
+
|
|
367
|
+
# ── Interactive choices ───────────────────────────────────────────────────
|
|
368
|
+
RESULTS_FILE="${SCRIPT_DIR}/.optimize-results.json"
|
|
369
|
+
|
|
370
|
+
if [ -f "${RESULTS_FILE}" ]; then
|
|
371
|
+
echo "What would you like to do with these results?"
|
|
372
|
+
echo ""
|
|
373
|
+
echo " 1) Deploy top recommendation — update do/config with optimized model"
|
|
374
|
+
echo " 2) Set up instance pools — build INSTANCE_POOLS with all results"
|
|
375
|
+
echo " 3) Save for later — store ModelPackageArn in do/config"
|
|
376
|
+
echo ""
|
|
377
|
+
printf "Choose [1/2/3]: "
|
|
378
|
+
read -r CHOICE
|
|
379
|
+
|
|
380
|
+
case "${CHOICE}" in
|
|
381
|
+
1)
|
|
382
|
+
echo ""
|
|
383
|
+
echo "📦 Deploying top recommendation..."
|
|
384
|
+
# Extract top result's ModelPackageArn and InferenceSpecificationName
|
|
385
|
+
TOP_MODEL_PACKAGE_ARN=$(python3 -c "
|
|
386
|
+
import json
|
|
387
|
+
with open('${RESULTS_FILE}') as f:
|
|
388
|
+
results = json.load(f)
|
|
389
|
+
if results:
|
|
390
|
+
r = results[0]
|
|
391
|
+
mc = r.get('ModelConfiguration', {})
|
|
392
|
+
print(mc.get('ModelPackageArn', r.get('ModelPackageArn', '')))
|
|
393
|
+
" 2>/dev/null) || TOP_MODEL_PACKAGE_ARN=""
|
|
394
|
+
|
|
395
|
+
TOP_INFERENCE_SPEC=$(python3 -c "
|
|
396
|
+
import json
|
|
397
|
+
with open('${RESULTS_FILE}') as f:
|
|
398
|
+
results = json.load(f)
|
|
399
|
+
if results:
|
|
400
|
+
r = results[0]
|
|
401
|
+
mc = r.get('ModelConfiguration', {})
|
|
402
|
+
print(mc.get('InferenceSpecificationName', r.get('InferenceSpecificationName', '')))
|
|
403
|
+
" 2>/dev/null) || TOP_INFERENCE_SPEC=""
|
|
404
|
+
|
|
405
|
+
TOP_INSTANCE_TYPE=$(python3 -c "
|
|
406
|
+
import json
|
|
407
|
+
with open('${RESULTS_FILE}') as f:
|
|
408
|
+
results = json.load(f)
|
|
409
|
+
if results:
|
|
410
|
+
r = results[0]
|
|
411
|
+
print(r.get('InstanceType', r.get('EndpointConfiguration', {}).get('InstanceType', '')))
|
|
412
|
+
" 2>/dev/null) || TOP_INSTANCE_TYPE=""
|
|
413
|
+
|
|
414
|
+
if [ -n "${TOP_MODEL_PACKAGE_ARN}" ]; then
|
|
415
|
+
_update_optimize_var "OPTIMIZE_MODEL_PACKAGE_ARN" "${TOP_MODEL_PACKAGE_ARN}"
|
|
416
|
+
echo " ✅ OPTIMIZE_MODEL_PACKAGE_ARN set in do/config"
|
|
417
|
+
fi
|
|
418
|
+
if [ -n "${TOP_INFERENCE_SPEC}" ]; then
|
|
419
|
+
_update_optimize_var "OPTIMIZE_INFERENCE_SPEC" "${TOP_INFERENCE_SPEC}"
|
|
420
|
+
echo " ✅ OPTIMIZE_INFERENCE_SPEC set in do/config"
|
|
421
|
+
fi
|
|
422
|
+
if [ -n "${TOP_INSTANCE_TYPE}" ]; then
|
|
423
|
+
_update_optimize_var "INSTANCE_TYPE" "${TOP_INSTANCE_TYPE}"
|
|
424
|
+
echo " ✅ INSTANCE_TYPE updated to: ${TOP_INSTANCE_TYPE}"
|
|
425
|
+
fi
|
|
426
|
+
echo ""
|
|
427
|
+
echo "✅ Top recommendation applied to do/config"
|
|
428
|
+
echo " Run ./do/deploy to deploy with the optimized configuration."
|
|
429
|
+
;;
|
|
430
|
+
2)
|
|
431
|
+
echo ""
|
|
432
|
+
echo "📦 Setting up instance pools from all results..."
|
|
433
|
+
# Build INSTANCE_POOLS JSON from results
|
|
434
|
+
POOLS_JSON=$(python3 -c "
|
|
435
|
+
import json
|
|
436
|
+
with open('${RESULTS_FILE}') as f:
|
|
437
|
+
results = json.load(f)
|
|
438
|
+
pools = []
|
|
439
|
+
for i, r in enumerate(results, 1):
|
|
440
|
+
instance_type = r.get('InstanceType', r.get('EndpointConfiguration', {}).get('InstanceType', ''))
|
|
441
|
+
if not instance_type:
|
|
442
|
+
continue
|
|
443
|
+
entry = {'InstanceType': instance_type, 'Priority': i}
|
|
444
|
+
mc = r.get('ModelConfiguration', {})
|
|
445
|
+
model_name = mc.get('ModelPackageArn', r.get('ModelPackageArn', ''))
|
|
446
|
+
if model_name:
|
|
447
|
+
entry['ModelName'] = model_name
|
|
448
|
+
pools.append(entry)
|
|
449
|
+
print(json.dumps(pools))
|
|
450
|
+
" 2>/dev/null) || POOLS_JSON=""
|
|
451
|
+
|
|
452
|
+
if [ -n "${POOLS_JSON}" ] && [ "${POOLS_JSON}" != "[]" ]; then
|
|
453
|
+
_update_optimize_var "INSTANCE_POOLS" "${POOLS_JSON}"
|
|
454
|
+
echo " ✅ INSTANCE_POOLS set in do/config"
|
|
455
|
+
echo " Pools: ${POOLS_JSON}"
|
|
456
|
+
echo ""
|
|
457
|
+
echo "✅ Instance pools configured from recommendation results."
|
|
458
|
+
echo " Each pool entry includes ModelName for ModelNameOverride support."
|
|
459
|
+
echo " Run ./do/deploy to deploy with heterogeneous instance pools."
|
|
460
|
+
else
|
|
461
|
+
echo " ⚠️ Could not build instance pools from results"
|
|
462
|
+
fi
|
|
463
|
+
;;
|
|
464
|
+
3)
|
|
465
|
+
echo ""
|
|
466
|
+
echo "📦 Saving results for later..."
|
|
467
|
+
# Store the top ModelPackageArn
|
|
468
|
+
SAVE_MODEL_PACKAGE_ARN=$(python3 -c "
|
|
469
|
+
import json
|
|
470
|
+
with open('${RESULTS_FILE}') as f:
|
|
471
|
+
results = json.load(f)
|
|
472
|
+
if results:
|
|
473
|
+
r = results[0]
|
|
474
|
+
mc = r.get('ModelConfiguration', {})
|
|
475
|
+
print(mc.get('ModelPackageArn', r.get('ModelPackageArn', '')))
|
|
476
|
+
" 2>/dev/null) || SAVE_MODEL_PACKAGE_ARN=""
|
|
477
|
+
|
|
478
|
+
if [ -n "${SAVE_MODEL_PACKAGE_ARN}" ]; then
|
|
479
|
+
_update_optimize_var "OPTIMIZE_MODEL_PACKAGE_ARN" "${SAVE_MODEL_PACKAGE_ARN}"
|
|
480
|
+
echo " ✅ OPTIMIZE_MODEL_PACKAGE_ARN saved to do/config"
|
|
481
|
+
fi
|
|
482
|
+
echo ""
|
|
483
|
+
echo "✅ Results saved. You can apply them later by editing do/config."
|
|
484
|
+
;;
|
|
485
|
+
*)
|
|
486
|
+
echo " No action taken. Results are available in the job output."
|
|
487
|
+
;;
|
|
488
|
+
esac
|
|
489
|
+
|
|
490
|
+
# Clean up temp results file
|
|
491
|
+
rm -f "${RESULTS_FILE}"
|
|
492
|
+
fi
|
|
493
|
+
|
|
494
|
+
elif [ "${JOB_STATUS}" = "FAILED" ]; then
|
|
495
|
+
FAILURE_REASON=$(echo "${JOB_DESCRIPTION:-}" | python3 -c "
|
|
496
|
+
import json, sys
|
|
497
|
+
try:
|
|
498
|
+
data = json.load(sys.stdin)
|
|
499
|
+
print(data.get('FailureReason', 'unknown'))
|
|
500
|
+
except:
|
|
501
|
+
print('unknown')
|
|
502
|
+
" 2>/dev/null) || FAILURE_REASON="unknown"
|
|
503
|
+
|
|
504
|
+
if [ "${FAILURE_REASON}" = "unknown" ]; then
|
|
505
|
+
FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
|
|
506
|
+
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
507
|
+
--region "${AWS_REGION}" \
|
|
508
|
+
--query 'FailureReason' \
|
|
509
|
+
--output text 2>/dev/null) || FAILURE_REASON="unknown"
|
|
510
|
+
fi
|
|
511
|
+
|
|
512
|
+
echo "❌ Recommendation job failed"
|
|
513
|
+
echo " Reason: ${FAILURE_REASON}"
|
|
514
|
+
echo ""
|
|
515
|
+
echo " Debug:"
|
|
516
|
+
echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
|
|
517
|
+
|
|
518
|
+
elif [ "${JOB_STATUS}" = "STOPPED" ]; then
|
|
519
|
+
echo "⚠️ Recommendation job was stopped before completion"
|
|
520
|
+
echo " No results available."
|
|
521
|
+
fi
|
|
522
|
+
|
|
523
|
+
echo ""
|
|
524
|
+
echo "📋 Summary:"
|
|
525
|
+
echo " Recommendation Job: ${OPTIMIZE_JOB_NAME}"
|
|
526
|
+
echo " Status: ${JOB_STATUS}"
|
|
527
|
+
echo " Goal: ${GOAL}"
|
|
528
|
+
echo ""
|
package/templates/do/register
CHANGED
|
@@ -89,6 +89,103 @@ case "${STATUS}" in
|
|
|
89
89
|
;;
|
|
90
90
|
esac
|
|
91
91
|
|
|
92
|
+
# ============================================================
|
|
93
|
+
# Build IC list from do/ic/ directory (multi-IC support)
|
|
94
|
+
# ============================================================
|
|
95
|
+
|
|
96
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
97
|
+
IC_LIST_JSON="[]"
|
|
98
|
+
if [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
99
|
+
# Build IC list from all conf files (alphabetical order)
|
|
100
|
+
IC_ENTRIES=""
|
|
101
|
+
IC_COUNT=0
|
|
102
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
103
|
+
[ -f "${conf}" ] || continue
|
|
104
|
+
|
|
105
|
+
# Source the IC config to get its variables
|
|
106
|
+
(
|
|
107
|
+
# Subshell to avoid polluting current environment
|
|
108
|
+
source "${conf}" 2>/dev/null
|
|
109
|
+
echo "${IC_DEPLOYED_NAME:-}|${IC_IMAGE_TAG:-}|${IC_GPU_COUNT:-1}|${IC_COPY_COUNT:-1}"
|
|
110
|
+
) > /tmp/ic_entry_$$ 2>/dev/null
|
|
111
|
+
|
|
112
|
+
IC_ENTRY=$(cat /tmp/ic_entry_$$ 2>/dev/null || echo "|||")
|
|
113
|
+
rm -f /tmp/ic_entry_$$
|
|
114
|
+
|
|
115
|
+
IC_BASENAME=$(basename "${conf}" .conf)
|
|
116
|
+
IC_ENTRY_IMAGE=$(echo "${IC_ENTRY}" | cut -d'|' -f2)
|
|
117
|
+
IC_ENTRY_GPU=$(echo "${IC_ENTRY}" | cut -d'|' -f3)
|
|
118
|
+
IC_ENTRY_COPY=$(echo "${IC_ENTRY}" | cut -d'|' -f4)
|
|
119
|
+
|
|
120
|
+
if [ -n "${IC_ENTRIES}" ]; then
|
|
121
|
+
IC_ENTRIES="${IC_ENTRIES},"
|
|
122
|
+
fi
|
|
123
|
+
IC_ENTRIES="${IC_ENTRIES}{\"name\":\"${IC_BASENAME}\",\"image\":\"${IC_ENTRY_IMAGE}\",\"gpuCount\":${IC_ENTRY_GPU:-1},\"copyCount\":${IC_ENTRY_COPY:-1}}"
|
|
124
|
+
IC_COUNT=$((IC_COUNT + 1))
|
|
125
|
+
done
|
|
126
|
+
|
|
127
|
+
if [ "${CI_MODE}" = true ] && [ ${IC_COUNT} -gt 1 ]; then
|
|
128
|
+
# CI mode: only include the first IC (alphabetically) to keep CI costs down
|
|
129
|
+
FIRST_CONF=$(ls "${SCRIPT_DIR}"/ic/*.conf 2>/dev/null | head -1)
|
|
130
|
+
if [ -n "${FIRST_CONF}" ]; then
|
|
131
|
+
(
|
|
132
|
+
source "${FIRST_CONF}" 2>/dev/null
|
|
133
|
+
echo "${IC_DEPLOYED_NAME:-}|${IC_IMAGE_TAG:-}|${IC_GPU_COUNT:-1}|${IC_COPY_COUNT:-1}"
|
|
134
|
+
) > /tmp/ic_first_$$ 2>/dev/null
|
|
135
|
+
|
|
136
|
+
FIRST_ENTRY=$(cat /tmp/ic_first_$$ 2>/dev/null || echo "|||")
|
|
137
|
+
rm -f /tmp/ic_first_$$
|
|
138
|
+
|
|
139
|
+
FIRST_BASENAME=$(basename "${FIRST_CONF}" .conf)
|
|
140
|
+
FIRST_IMAGE=$(echo "${FIRST_ENTRY}" | cut -d'|' -f2)
|
|
141
|
+
FIRST_GPU=$(echo "${FIRST_ENTRY}" | cut -d'|' -f3)
|
|
142
|
+
FIRST_COPY=$(echo "${FIRST_ENTRY}" | cut -d'|' -f4)
|
|
143
|
+
|
|
144
|
+
IC_LIST_JSON="[{\"name\":\"${FIRST_BASENAME}\",\"image\":\"${FIRST_IMAGE}\",\"gpuCount\":${FIRST_GPU:-1},\"copyCount\":${FIRST_COPY:-1}}]"
|
|
145
|
+
fi
|
|
146
|
+
else
|
|
147
|
+
IC_LIST_JSON="[${IC_ENTRIES}]"
|
|
148
|
+
fi
|
|
149
|
+
else
|
|
150
|
+
# Legacy: single IC from do/config
|
|
151
|
+
IC_LIST_JSON="[{\"name\":\"default\",\"image\":\"${PROJECT_NAME}-latest\",\"gpuCount\":${IC_GPU_COUNT:-1},\"copyCount\":${IC_COPY_COUNT:-1}}]"
|
|
152
|
+
fi
|
|
153
|
+
|
|
154
|
+
# Append adapter entries from do/adapters/*.conf
|
|
155
|
+
ADAPTER_COUNT=0
|
|
156
|
+
if [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
157
|
+
ADAPTER_ENTRIES=""
|
|
158
|
+
for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
159
|
+
[ -f "${conf}" ] || continue
|
|
160
|
+
[[ "$(basename "${conf}")" == ".gitkeep" ]] && continue
|
|
161
|
+
|
|
162
|
+
ADAPTER_NAME_VAL=""
|
|
163
|
+
ADAPTER_WEIGHTS_VAL=""
|
|
164
|
+
ADAPTER_IC_VAL=""
|
|
165
|
+
eval "$(grep '^export ADAPTER_NAME=' "${conf}" 2>/dev/null)"
|
|
166
|
+
eval "$(grep '^export ADAPTER_WEIGHTS_URI=' "${conf}" 2>/dev/null)"
|
|
167
|
+
eval "$(grep '^export ADAPTER_IC_NAME=' "${conf}" 2>/dev/null)"
|
|
168
|
+
ADAPTER_NAME_VAL="${ADAPTER_NAME:-$(basename "${conf}" .conf)}"
|
|
169
|
+
ADAPTER_WEIGHTS_VAL="${ADAPTER_WEIGHTS_URI:-}"
|
|
170
|
+
ADAPTER_IC_VAL="${ADAPTER_IC_NAME:-}"
|
|
171
|
+
|
|
172
|
+
if [ -n "${ADAPTER_ENTRIES}" ]; then
|
|
173
|
+
ADAPTER_ENTRIES="${ADAPTER_ENTRIES},"
|
|
174
|
+
fi
|
|
175
|
+
ADAPTER_ENTRIES="${ADAPTER_ENTRIES}{\"name\":\"${ADAPTER_NAME_VAL}\",\"isAdapter\":true,\"baseIcName\":\"${ADAPTER_IC_VAL}\",\"artifactUrl\":\"${ADAPTER_WEIGHTS_VAL}\",\"gpuCount\":0,\"copyCount\":1}"
|
|
176
|
+
ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
|
|
177
|
+
unset ADAPTER_NAME ADAPTER_WEIGHTS_URI ADAPTER_IC_NAME
|
|
178
|
+
done
|
|
179
|
+
|
|
180
|
+
if [ -n "${ADAPTER_ENTRIES}" ] && [ "${IC_LIST_JSON}" != "[]" ]; then
|
|
181
|
+
# Append adapters to existing IC list
|
|
182
|
+
IC_LIST_JSON="${IC_LIST_JSON%]},${ADAPTER_ENTRIES}]"
|
|
183
|
+
elif [ -n "${ADAPTER_ENTRIES}" ]; then
|
|
184
|
+
IC_LIST_JSON="[${ADAPTER_ENTRIES}]"
|
|
185
|
+
fi
|
|
186
|
+
fi
|
|
187
|
+
<% } %>
|
|
188
|
+
|
|
92
189
|
# ============================================================
|
|
93
190
|
# Derive architecture and backend from DEPLOYMENT_CONFIG
|
|
94
191
|
# ============================================================
|
|
@@ -293,7 +390,7 @@ echo ""
|
|
|
293
390
|
# ============================================================
|
|
294
391
|
|
|
295
392
|
compute_config_id() {
|
|
296
|
-
local input="${DEPLOYMENT_CONFIG}:${MODEL_NAME:-none}:${INSTANCE_TYPE}:${AWS_REGION}:${DEPLOYMENT_TARGET}"
|
|
393
|
+
local input="${DEPLOYMENT_CONFIG}:${MODEL_NAME:-none}:${INSTANCE_TYPE}:${AWS_REGION}:${DEPLOYMENT_TARGET}:ic${IC_COUNT:-1}:adapt${ADAPTER_COUNT:-0}"
|
|
297
394
|
# Use sha256sum (Linux) with fallback to shasum (macOS)
|
|
298
395
|
if command -v sha256sum &> /dev/null; then
|
|
299
396
|
echo -n "$input" | sha256sum | cut -c1-16
|
|
@@ -373,6 +470,9 @@ write_ci_record() {
|
|
|
373
470
|
"modelWeight": ${IC_MODEL_WEIGHT}
|
|
374
471
|
<% } %>
|
|
375
472
|
},
|
|
473
|
+
<% } %>
|
|
474
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
475
|
+
"icList": ${IC_LIST_JSON},
|
|
376
476
|
<% } %>
|
|
377
477
|
"parameters": ${PARAMETERS}
|
|
378
478
|
}
|
|
@@ -488,6 +588,9 @@ if [ "${JSON_OUTPUT}" = true ] || [ "${CI_MODE}" = true ]; then
|
|
|
488
588
|
"modelWeight": ${IC_MODEL_WEIGHT}
|
|
489
589
|
<% } %>
|
|
490
590
|
},
|
|
591
|
+
<% } %>
|
|
592
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
593
|
+
"icList": ${IC_LIST_JSON},
|
|
491
594
|
<% } %>
|
|
492
595
|
"parameters": ${PARAMETERS}
|
|
493
596
|
}
|
|
@@ -574,6 +677,13 @@ fi
|
|
|
574
677
|
# Pass parameters as JSON string
|
|
575
678
|
CMD_ARGS+=("--parameters" "${PARAMETERS}")
|
|
576
679
|
|
|
680
|
+
# Pass IC list as JSON string
|
|
681
|
+
<% if (deploymentTarget === 'realtime-inference') { %>
|
|
682
|
+
if [ "${IC_LIST_JSON}" != "[]" ]; then
|
|
683
|
+
CMD_ARGS+=("--ic-list" "${IC_LIST_JSON}")
|
|
684
|
+
fi
|
|
685
|
+
<% } %>
|
|
686
|
+
|
|
577
687
|
# Pass generator version from package.json if available
|
|
578
688
|
GENERATOR_VERSION=""
|
|
579
689
|
if command -v node &> /dev/null; then
|