@aws/ml-container-creator 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +86 -7
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/buildspec.yml +60 -0
  5. package/package.json +3 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +42 -2
  8. package/servers/instance-sizer/lib/instance-ranker.js +114 -10
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +15 -15
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +71 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +9 -1
  16. package/src/app.js +109 -3
  17. package/src/lib/bootstrap-command-handler.js +96 -3
  18. package/src/lib/cli-handler.js +2 -2
  19. package/src/lib/config-manager.js +117 -1
  20. package/src/lib/deployment-entry-schema.js +16 -0
  21. package/src/lib/prompt-runner.js +270 -12
  22. package/src/lib/prompts.js +288 -6
  23. package/src/lib/registry-command-handler.js +12 -0
  24. package/src/lib/schema-sync.js +31 -0
  25. package/src/lib/template-manager.js +49 -1
  26. package/src/lib/validate-runner.js +125 -2
  27. package/templates/Dockerfile +22 -2
  28. package/templates/code/cuda_compat.sh +22 -0
  29. package/templates/code/serve +3 -0
  30. package/templates/code/serving.properties +14 -0
  31. package/templates/code/start_server.sh +3 -0
  32. package/templates/diffusors/Dockerfile +2 -1
  33. package/templates/diffusors/serve +3 -0
  34. package/templates/do/README.md +33 -0
  35. package/templates/do/adapter +1214 -0
  36. package/templates/do/adapters/.gitkeep +2 -0
  37. package/templates/do/add-ic +130 -0
  38. package/templates/do/benchmark +718 -0
  39. package/templates/do/clean +593 -17
  40. package/templates/do/config +49 -4
  41. package/templates/do/deploy +513 -362
  42. package/templates/do/ic/default.conf +32 -0
  43. package/templates/do/lib/endpoint-config.sh +216 -0
  44. package/templates/do/lib/inference-component.sh +167 -0
  45. package/templates/do/lib/secrets.sh +44 -0
  46. package/templates/do/lib/wait.sh +131 -0
  47. package/templates/do/logs +107 -27
  48. package/templates/do/optimize +528 -0
  49. package/templates/do/register +119 -2
  50. package/templates/do/status +337 -0
  51. package/templates/do/test +80 -28
  52. package/templates/triton/Dockerfile +5 -0
@@ -0,0 +1,528 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # do/optimize — Run SageMaker AI Inference Recommendations to find optimal
6
+ # instance types and model configurations for your workload.
7
+ # Wraps CreateAIRecommendationJob / DescribeAIRecommendationJob.
8
+
9
+ set -e
10
+ set -u
11
+ set -o pipefail
12
+
13
+ # ── Source project configuration ──────────────────────────────────────────────
14
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15
+ source "${SCRIPT_DIR}/config"
16
+ source "${SCRIPT_DIR}/lib/wait.sh"
17
+
18
+ # ── Parse flags ───────────────────────────────────────────────────────────────
19
+ GOAL=""
20
+ INSTANCES_ARG=""
21
+ FORCE=false
22
+ while [ $# -gt 0 ]; do
23
+ case "$1" in
24
+ --goal) shift; GOAL="${1:-}"; shift ;;
25
+ --instances) shift; INSTANCES_ARG="${1:-}"; shift ;;
26
+ --force) FORCE=true; shift ;;
27
+ --help|-h)
28
+ echo "Usage: ./do/optimize --goal <cost|latency|throughput> [--instances type1,type2] [--force]"
29
+ echo ""
30
+ echo "Run SageMaker AI Inference Recommendations to find optimal"
31
+ echo "instance types and model configurations for your workload."
32
+ echo ""
33
+ echo "Options:"
34
+ echo " --goal <goal> Optimization goal: cost, latency, or throughput (required)"
35
+ echo " --instances <list> Comma-separated instance types to evaluate"
36
+ echo " Defaults to INSTANCE_POOLS entries or INSTANCE_TYPE from do/config"
37
+ echo " --force Create a new recommendation job even if one exists"
38
+ echo ""
39
+ echo "Examples:"
40
+ echo " ./do/optimize --goal throughput"
41
+ echo " ./do/optimize --goal cost --instances ml.g6e.48xlarge,ml.p5.48xlarge"
42
+ echo " ./do/optimize --goal latency --force"
43
+ echo ""
44
+ echo "Idempotency:"
45
+ echo " If OPTIMIZE_JOB_NAME is set in do/config and the job is still running,"
46
+ echo " re-running without --force will resume waiting for the existing job."
47
+ echo ""
48
+ echo "Prerequisites:"
49
+ echo " • MODEL_NAME must be set in do/config (HuggingFace model ID or S3 path)"
50
+ echo " • AWS credentials must be configured"
51
+ exit 0
52
+ ;;
53
+ *) shift ;;
54
+ esac
55
+ done
56
+
57
+ # ── Validate goal ─────────────────────────────────────────────────────────────
58
+ if [ -z "${GOAL}" ]; then
59
+ echo "❌ --goal is required. Choose one of: cost, latency, throughput"
60
+ echo " Example: ./do/optimize --goal throughput"
61
+ exit 1
62
+ fi
63
+
64
+ case "${GOAL}" in
65
+ cost|latency|throughput) ;;
66
+ *)
67
+ echo "❌ Invalid goal: ${GOAL}"
68
+ echo " Valid goals: cost, latency, throughput"
69
+ exit 1
70
+ ;;
71
+ esac
72
+
73
+ # ── Verify AWS CLI v2 ─────────────────────────────────────────────────────────
74
+ if ! aws --version 2>&1 | grep -q "aws-cli/2"; then
75
+ echo "❌ AWS CLI v2 is required for inference recommendations."
76
+ echo " Detected: $(aws --version 2>&1 | head -1)"
77
+ echo ""
78
+ echo " Install CLI v2: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
79
+ exit 1
80
+ fi
81
+
82
+ # ── Resolve model name ────────────────────────────────────────────────────────
83
+ if [ -z "${MODEL_NAME:-}" ]; then
84
+ echo "❌ MODEL_NAME is not set in do/config"
85
+ echo " Set MODEL_NAME to a HuggingFace model ID or S3 model path."
86
+ exit 1
87
+ fi
88
+
89
+ echo "🔍 Inference Recommendations"
90
+ echo " Project: ${PROJECT_NAME}"
91
+ echo " Model: ${MODEL_NAME}"
92
+ echo " Goal: ${GOAL}"
93
+
94
+ # ── Resolve instance types ────────────────────────────────────────────────────
95
+ # Priority: --instances flag > INSTANCE_POOLS > INSTANCE_TYPE
96
+ INSTANCE_TYPES=""
97
+
98
+ if [ -n "${INSTANCES_ARG}" ]; then
99
+ # From --instances flag (comma-separated)
100
+ INSTANCE_TYPES="${INSTANCES_ARG}"
101
+ echo " Instances (from --instances): ${INSTANCE_TYPES}"
102
+ elif [ -n "${INSTANCE_POOLS:-}" ]; then
103
+ # Extract instance types from INSTANCE_POOLS JSON
104
+ INSTANCE_TYPES=$(echo "${INSTANCE_POOLS}" | grep -oE '"InstanceType"\s*:\s*"[^"]+"' | sed 's/"InstanceType"\s*:\s*"//;s/"$//' | paste -sd ',' - || true)
105
+ echo " Instances (from INSTANCE_POOLS): ${INSTANCE_TYPES}"
106
+ elif [ -n "${INSTANCE_TYPE:-}" ]; then
107
+ INSTANCE_TYPES="${INSTANCE_TYPE}"
108
+ echo " Instances (from INSTANCE_TYPE): ${INSTANCE_TYPES}"
109
+ else
110
+ echo "❌ No instance types available."
111
+ echo " Provide --instances flag, or set INSTANCE_POOLS or INSTANCE_TYPE in do/config."
112
+ exit 1
113
+ fi
114
+
115
+ # ── Resolve workload parameters from benchmark config (if available) ──────────
116
+ CONCURRENCY="${BENCHMARK_CONCURRENCY:-1}"
117
+ INPUT_TOKENS="${BENCHMARK_INPUT_TOKENS_MEAN:-256}"
118
+ OUTPUT_TOKENS="${BENCHMARK_OUTPUT_TOKENS_MEAN:-256}"
119
+
120
+ echo " Concurrency: ${CONCURRENCY}"
121
+ echo " Input tokens: ${INPUT_TOKENS}"
122
+ echo " Output tokens: ${OUTPUT_TOKENS}"
123
+ echo ""
124
+
125
+ # ── Helper: update a variable in do/config ────────────────────────────────────
126
+ _update_optimize_var() {
127
+ _update_config_var "$1" "$2"
128
+ }
129
+
130
+ # ── Idempotency: Check for existing recommendation job ────────────────────────
131
+ RESUME_EXISTING=false
132
+
133
+ if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
134
+ EXISTING_STATUS=$(aws sagemaker describe-ai-recommendation-job \
135
+ --job-name "${OPTIMIZE_JOB_NAME}" \
136
+ --region "${AWS_REGION}" \
137
+ --query 'Status' \
138
+ --output text 2>/dev/null) || EXISTING_STATUS=""
139
+
140
+ case "${EXISTING_STATUS}" in
141
+ IN_PROGRESS|PENDING|STARTING)
142
+ echo "📊 Resuming existing recommendation job: ${OPTIMIZE_JOB_NAME}"
143
+ echo " Status: ${EXISTING_STATUS}"
144
+ echo " (use --force to start a new job instead)"
145
+ echo ""
146
+ RESUME_EXISTING=true
147
+ ;;
148
+ COMPLETED)
149
+ echo "📊 Previous recommendation job already completed: ${OPTIMIZE_JOB_NAME}"
150
+ echo " (use --force to start a new job)"
151
+ echo ""
152
+ RESUME_EXISTING=true
153
+ JOB_STATUS="COMPLETED"
154
+ ;;
155
+ FAILED|STOPPED)
156
+ FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
157
+ --job-name "${OPTIMIZE_JOB_NAME}" \
158
+ --region "${AWS_REGION}" \
159
+ --query 'FailureReason' \
160
+ --output text 2>/dev/null) || FAILURE_REASON="unknown"
161
+ echo "⚠️ Previous recommendation job ${EXISTING_STATUS}: ${OPTIMIZE_JOB_NAME}"
162
+ if [ -n "${FAILURE_REASON}" ] && [ "${FAILURE_REASON}" != "None" ]; then
163
+ echo " Reason: ${FAILURE_REASON}"
164
+ fi
165
+ echo " Use --force to start a new job."
166
+ exit 1
167
+ ;;
168
+ *)
169
+ # Job doesn't exist or can't be described — proceed with new job
170
+ ;;
171
+ esac
172
+ fi
173
+
174
+ # ── Create recommendation job ─────────────────────────────────────────────────
175
+ if [ "${RESUME_EXISTING}" = false ]; then
176
+ OPTIMIZE_JOB_NAME="${PROJECT_NAME}-optimize-$(date +%Y%m%d-%H%M%S)"
177
+
178
+ echo "🚀 Creating AI Recommendation Job: ${OPTIMIZE_JOB_NAME}"
179
+
180
+ # Build instance type list as JSON array
181
+ INSTANCE_TYPES_JSON="["
182
+ FIRST=true
183
+ IFS=',' read -ra TYPES <<< "${INSTANCE_TYPES}"
184
+ for itype in "${TYPES[@]}"; do
185
+ itype=$(echo "${itype}" | xargs) # trim whitespace
186
+ if [ "${FIRST}" = true ]; then
187
+ INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON}\"${itype}\""
188
+ FIRST=false
189
+ else
190
+ INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON},\"${itype}\""
191
+ fi
192
+ done
193
+ INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON}]"
194
+
195
+ # Build job input config
196
+ # The model is specified as either a HuggingFace model ID or S3 path
197
+ MODEL_SOURCE_JSON=""
198
+ if [[ "${MODEL_NAME}" == s3://* ]]; then
199
+ MODEL_SOURCE_JSON="{\"S3DataSource\":{\"S3Uri\":\"${MODEL_NAME}\"}}"
200
+ else
201
+ MODEL_SOURCE_JSON="{\"ModelName\":\"${MODEL_NAME}\"}"
202
+ fi
203
+
204
+ # Build workload config
205
+ WORKLOAD_JSON="{\"Concurrency\":${CONCURRENCY},\"InputTokens\":${INPUT_TOKENS},\"OutputTokens\":${OUTPUT_TOKENS}}"
206
+
207
+ # Build the full job specification
208
+ JOB_INPUT="{\"ModelSource\":${MODEL_SOURCE_JSON},\"Workload\":${WORKLOAD_JSON},\"InstanceTypes\":${INSTANCE_TYPES_JSON},\"OptimizationGoal\":\"${GOAL}\"}"
209
+
210
+ if ! aws sagemaker create-ai-recommendation-job \
211
+ --job-name "${OPTIMIZE_JOB_NAME}" \
212
+ --role-arn "${ROLE_ARN}" \
213
+ --input-config "${JOB_INPUT}" \
214
+ --region "${AWS_REGION}"; then
215
+ echo "❌ Failed to create AI Recommendation Job"
216
+ echo " Check that:"
217
+ echo " • The execution role has sagemaker:CreateAIRecommendationJob permission"
218
+ echo " • The model name or S3 path is valid: ${MODEL_NAME}"
219
+ echo " • The instance types are valid: ${INSTANCE_TYPES}"
220
+ echo " • The API is available in region: ${AWS_REGION}"
221
+ exit 1
222
+ fi
223
+
224
+ echo "✅ Recommendation job created: ${OPTIMIZE_JOB_NAME}"
225
+
226
+ # Save job name to do/config for idempotency on re-run
227
+ _update_optimize_var "OPTIMIZE_JOB_NAME" "${OPTIMIZE_JOB_NAME}"
228
+ echo ""
229
+ fi
230
+
231
+ # ── Poll for completion ───────────────────────────────────────────────────────
232
+ POLL_INTERVAL=30
233
+ MAX_POLL_ATTEMPTS=120 # 60 minutes max (120 * 30s)
234
+
235
+ if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ]; then
236
+
237
+ echo "⏳ Waiting for recommendation job to complete..."
238
+ echo " Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 60 min)"
239
+ echo ""
240
+
241
+ POLL_COUNT=0
242
+ JOB_STATUS=""
243
+
244
+ while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
245
+ JOB_STATUS=$(aws sagemaker describe-ai-recommendation-job \
246
+ --job-name "${OPTIMIZE_JOB_NAME}" \
247
+ --region "${AWS_REGION}" \
248
+ --query 'Status' \
249
+ --output text 2>/dev/null) || {
250
+ echo "⚠️ Failed to describe recommendation job (credentials may have expired)"
251
+ echo " Re-run to check status:"
252
+ echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
253
+ exit 1
254
+ }
255
+
256
+ case "${JOB_STATUS}" in
257
+ COMPLETED)
258
+ echo "✅ Recommendation job completed!"
259
+ break
260
+ ;;
261
+ FAILED)
262
+ echo "❌ Recommendation job failed"
263
+ break
264
+ ;;
265
+ STOPPED)
266
+ echo "⚠️ Recommendation job was stopped"
267
+ break
268
+ ;;
269
+ *)
270
+ POLL_COUNT=$((POLL_COUNT + 1))
271
+ ELAPSED=$((POLL_COUNT * POLL_INTERVAL))
272
+ echo " $(date +%H:%M:%S) Status: ${JOB_STATUS} (${ELAPSED}s elapsed)"
273
+ sleep ${POLL_INTERVAL}
274
+ ;;
275
+ esac
276
+ done
277
+
278
+ if [ ${POLL_COUNT} -ge ${MAX_POLL_ATTEMPTS} ]; then
279
+ echo ""
280
+ echo "⚠️ Recommendation job timed out after 60 minutes (status: ${JOB_STATUS})"
281
+ echo " The job may still be running. Re-run ./do/optimize to resume waiting."
282
+ exit 1
283
+ fi
284
+
285
+ fi # end of polling conditional
286
+
287
+ echo ""
288
+
289
+ # ── Display results ───────────────────────────────────────────────────────────
290
+ if [ "${JOB_STATUS}" = "COMPLETED" ]; then
291
+ echo "📊 Fetching recommendation results..."
292
+
293
+ # Get the full job description with results
294
+ JOB_DESCRIPTION=$(aws sagemaker describe-ai-recommendation-job \
295
+ --job-name "${OPTIMIZE_JOB_NAME}" \
296
+ --region "${AWS_REGION}" \
297
+ --output json 2>/dev/null) || {
298
+ echo "❌ Failed to fetch recommendation results"
299
+ exit 1
300
+ }
301
+
302
+ # Display results using python for JSON parsing
303
+ if command -v python3 &>/dev/null; then
304
+ echo "${JOB_DESCRIPTION}" | python3 -c "
305
+ import json, sys
306
+
307
+ data = json.load(sys.stdin)
308
+ results = data.get('Results', data.get('InferenceRecommendations', []))
309
+
310
+ if not results:
311
+ print(' No recommendations returned.')
312
+ sys.exit(0)
313
+
314
+ # If results is a dict with a list inside, extract it
315
+ if isinstance(results, dict):
316
+ results = results.get('Recommendations', results.get('InferenceRecommendations', []))
317
+
318
+ print('╔══════════════════════════════════════════════════════════════════════════╗')
319
+ print('║ SageMaker AI Inference Recommendations ║')
320
+ print('╠══════════════════════════════════════════════════════════════════════════╣')
321
+ print(f'║ Job: ${OPTIMIZE_JOB_NAME}')
322
+ print(f'║ Goal: ${GOAL}')
323
+ print(f'║ Model: ${MODEL_NAME}')
324
+ print('╠══════════════════════════════════════════════════════════════════════════╣')
325
+
326
+ for i, rec in enumerate(results, 1):
327
+ instance_type = rec.get('InstanceType', rec.get('EndpointConfiguration', {}).get('InstanceType', 'N/A'))
328
+ model_config = rec.get('ModelConfiguration', {})
329
+ model_package_arn = model_config.get('ModelPackageArn', rec.get('ModelPackageArn', 'N/A'))
330
+ inference_spec = model_config.get('InferenceSpecificationName', rec.get('InferenceSpecificationName', 'N/A'))
331
+
332
+ metrics = rec.get('Metrics', rec.get('RecommendationMetrics', {}))
333
+ ttft = metrics.get('TimeToFirstToken', metrics.get('TTFT', 'N/A'))
334
+ itl = metrics.get('InterTokenLatency', metrics.get('ITL', 'N/A'))
335
+ throughput = metrics.get('Throughput', metrics.get('MaxInvocations', 'N/A'))
336
+ cost = metrics.get('CostPerHour', metrics.get('CostPerInference', 'N/A'))
337
+
338
+ rank_marker = ' ← TOP' if i == 1 else ''
339
+ print(f'║')
340
+ print(f'║ #{i}{rank_marker}')
341
+ print(f'║ Instance Type: {instance_type}')
342
+ print(f'║ ModelPackageArn: {model_package_arn}')
343
+ print(f'║ InferenceSpec: {inference_spec}')
344
+ print(f'║ ────────────────────────────────────────')
345
+ print(f'║ TTFT (ms): {ttft}')
346
+ print(f'║ ITL (ms): {itl}')
347
+ print(f'║ Throughput: {throughput}')
348
+ print(f'║ Cost: {cost}')
349
+
350
+ print('║')
351
+ print('╚══════════════════════════════════════════════════════════════════════════╝')
352
+
353
+ # Output structured data for the interactive choices below
354
+ # Write results to a temp file for bash to read
355
+ import tempfile, os
356
+ results_file = os.path.join('${SCRIPT_DIR}', '.optimize-results.json')
357
+ with open(results_file, 'w') as f:
358
+ json.dump(results, f)
359
+ "
360
+ else
361
+ echo " (python3 not available — showing raw JSON)"
362
+ echo "${JOB_DESCRIPTION}" | head -100
363
+ fi
364
+
365
+ echo ""
366
+
367
+ # ── Interactive choices ───────────────────────────────────────────────────
368
+ RESULTS_FILE="${SCRIPT_DIR}/.optimize-results.json"
369
+
370
+ if [ -f "${RESULTS_FILE}" ]; then
371
+ echo "What would you like to do with these results?"
372
+ echo ""
373
+ echo " 1) Deploy top recommendation — update do/config with optimized model"
374
+ echo " 2) Set up instance pools — build INSTANCE_POOLS with all results"
375
+ echo " 3) Save for later — store ModelPackageArn in do/config"
376
+ echo ""
377
+ printf "Choose [1/2/3]: "
378
+ read -r CHOICE
379
+
380
+ case "${CHOICE}" in
381
+ 1)
382
+ echo ""
383
+ echo "📦 Deploying top recommendation..."
384
+ # Extract top result's ModelPackageArn and InferenceSpecificationName
385
+ TOP_MODEL_PACKAGE_ARN=$(python3 -c "
386
+ import json
387
+ with open('${RESULTS_FILE}') as f:
388
+ results = json.load(f)
389
+ if results:
390
+ r = results[0]
391
+ mc = r.get('ModelConfiguration', {})
392
+ print(mc.get('ModelPackageArn', r.get('ModelPackageArn', '')))
393
+ " 2>/dev/null) || TOP_MODEL_PACKAGE_ARN=""
394
+
395
+ TOP_INFERENCE_SPEC=$(python3 -c "
396
+ import json
397
+ with open('${RESULTS_FILE}') as f:
398
+ results = json.load(f)
399
+ if results:
400
+ r = results[0]
401
+ mc = r.get('ModelConfiguration', {})
402
+ print(mc.get('InferenceSpecificationName', r.get('InferenceSpecificationName', '')))
403
+ " 2>/dev/null) || TOP_INFERENCE_SPEC=""
404
+
405
+ TOP_INSTANCE_TYPE=$(python3 -c "
406
+ import json
407
+ with open('${RESULTS_FILE}') as f:
408
+ results = json.load(f)
409
+ if results:
410
+ r = results[0]
411
+ print(r.get('InstanceType', r.get('EndpointConfiguration', {}).get('InstanceType', '')))
412
+ " 2>/dev/null) || TOP_INSTANCE_TYPE=""
413
+
414
+ if [ -n "${TOP_MODEL_PACKAGE_ARN}" ]; then
415
+ _update_optimize_var "OPTIMIZE_MODEL_PACKAGE_ARN" "${TOP_MODEL_PACKAGE_ARN}"
416
+ echo " ✅ OPTIMIZE_MODEL_PACKAGE_ARN set in do/config"
417
+ fi
418
+ if [ -n "${TOP_INFERENCE_SPEC}" ]; then
419
+ _update_optimize_var "OPTIMIZE_INFERENCE_SPEC" "${TOP_INFERENCE_SPEC}"
420
+ echo " ✅ OPTIMIZE_INFERENCE_SPEC set in do/config"
421
+ fi
422
+ if [ -n "${TOP_INSTANCE_TYPE}" ]; then
423
+ _update_optimize_var "INSTANCE_TYPE" "${TOP_INSTANCE_TYPE}"
424
+ echo " ✅ INSTANCE_TYPE updated to: ${TOP_INSTANCE_TYPE}"
425
+ fi
426
+ echo ""
427
+ echo "✅ Top recommendation applied to do/config"
428
+ echo " Run ./do/deploy to deploy with the optimized configuration."
429
+ ;;
430
+ 2)
431
+ echo ""
432
+ echo "📦 Setting up instance pools from all results..."
433
+ # Build INSTANCE_POOLS JSON from results
434
+ POOLS_JSON=$(python3 -c "
435
+ import json
436
+ with open('${RESULTS_FILE}') as f:
437
+ results = json.load(f)
438
+ pools = []
439
+ for i, r in enumerate(results, 1):
440
+ instance_type = r.get('InstanceType', r.get('EndpointConfiguration', {}).get('InstanceType', ''))
441
+ if not instance_type:
442
+ continue
443
+ entry = {'InstanceType': instance_type, 'Priority': i}
444
+ mc = r.get('ModelConfiguration', {})
445
+ model_name = mc.get('ModelPackageArn', r.get('ModelPackageArn', ''))
446
+ if model_name:
447
+ entry['ModelName'] = model_name
448
+ pools.append(entry)
449
+ print(json.dumps(pools))
450
+ " 2>/dev/null) || POOLS_JSON=""
451
+
452
+ if [ -n "${POOLS_JSON}" ] && [ "${POOLS_JSON}" != "[]" ]; then
453
+ _update_optimize_var "INSTANCE_POOLS" "${POOLS_JSON}"
454
+ echo " ✅ INSTANCE_POOLS set in do/config"
455
+ echo " Pools: ${POOLS_JSON}"
456
+ echo ""
457
+ echo "✅ Instance pools configured from recommendation results."
458
+ echo " Each pool entry includes ModelName for ModelNameOverride support."
459
+ echo " Run ./do/deploy to deploy with heterogeneous instance pools."
460
+ else
461
+ echo " ⚠️ Could not build instance pools from results"
462
+ fi
463
+ ;;
464
+ 3)
465
+ echo ""
466
+ echo "📦 Saving results for later..."
467
+ # Store the top ModelPackageArn
468
+ SAVE_MODEL_PACKAGE_ARN=$(python3 -c "
469
+ import json
470
+ with open('${RESULTS_FILE}') as f:
471
+ results = json.load(f)
472
+ if results:
473
+ r = results[0]
474
+ mc = r.get('ModelConfiguration', {})
475
+ print(mc.get('ModelPackageArn', r.get('ModelPackageArn', '')))
476
+ " 2>/dev/null) || SAVE_MODEL_PACKAGE_ARN=""
477
+
478
+ if [ -n "${SAVE_MODEL_PACKAGE_ARN}" ]; then
479
+ _update_optimize_var "OPTIMIZE_MODEL_PACKAGE_ARN" "${SAVE_MODEL_PACKAGE_ARN}"
480
+ echo " ✅ OPTIMIZE_MODEL_PACKAGE_ARN saved to do/config"
481
+ fi
482
+ echo ""
483
+ echo "✅ Results saved. You can apply them later by editing do/config."
484
+ ;;
485
+ *)
486
+ echo " No action taken. Results are available in the job output."
487
+ ;;
488
+ esac
489
+
490
+ # Clean up temp results file
491
+ rm -f "${RESULTS_FILE}"
492
+ fi
493
+
494
+ elif [ "${JOB_STATUS}" = "FAILED" ]; then
495
+ FAILURE_REASON=$(echo "${JOB_DESCRIPTION:-}" | python3 -c "
496
+ import json, sys
497
+ try:
498
+ data = json.load(sys.stdin)
499
+ print(data.get('FailureReason', 'unknown'))
500
+ except:
501
+ print('unknown')
502
+ " 2>/dev/null) || FAILURE_REASON="unknown"
503
+
504
+ if [ "${FAILURE_REASON}" = "unknown" ]; then
505
+ FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
506
+ --job-name "${OPTIMIZE_JOB_NAME}" \
507
+ --region "${AWS_REGION}" \
508
+ --query 'FailureReason' \
509
+ --output text 2>/dev/null) || FAILURE_REASON="unknown"
510
+ fi
511
+
512
+ echo "❌ Recommendation job failed"
513
+ echo " Reason: ${FAILURE_REASON}"
514
+ echo ""
515
+ echo " Debug:"
516
+ echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
517
+
518
+ elif [ "${JOB_STATUS}" = "STOPPED" ]; then
519
+ echo "⚠️ Recommendation job was stopped before completion"
520
+ echo " No results available."
521
+ fi
522
+
523
+ echo ""
524
+ echo "📋 Summary:"
525
+ echo " Recommendation Job: ${OPTIMIZE_JOB_NAME}"
526
+ echo " Status: ${JOB_STATUS}"
527
+ echo " Goal: ${GOAL}"
528
+ echo ""