@aws/ml-container-creator 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +86 -7
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/buildspec.yml +60 -0
  5. package/package.json +3 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +42 -2
  8. package/servers/instance-sizer/lib/instance-ranker.js +114 -10
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +15 -15
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +71 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +9 -1
  16. package/src/app.js +109 -3
  17. package/src/lib/bootstrap-command-handler.js +96 -3
  18. package/src/lib/cli-handler.js +2 -2
  19. package/src/lib/config-manager.js +117 -1
  20. package/src/lib/deployment-entry-schema.js +16 -0
  21. package/src/lib/prompt-runner.js +270 -12
  22. package/src/lib/prompts.js +288 -6
  23. package/src/lib/registry-command-handler.js +12 -0
  24. package/src/lib/schema-sync.js +31 -0
  25. package/src/lib/template-manager.js +49 -1
  26. package/src/lib/validate-runner.js +125 -2
  27. package/templates/Dockerfile +22 -2
  28. package/templates/code/cuda_compat.sh +22 -0
  29. package/templates/code/serve +3 -0
  30. package/templates/code/serving.properties +14 -0
  31. package/templates/code/start_server.sh +3 -0
  32. package/templates/diffusors/Dockerfile +2 -1
  33. package/templates/diffusors/serve +3 -0
  34. package/templates/do/README.md +33 -0
  35. package/templates/do/adapter +1214 -0
  36. package/templates/do/adapters/.gitkeep +2 -0
  37. package/templates/do/add-ic +130 -0
  38. package/templates/do/benchmark +718 -0
  39. package/templates/do/clean +593 -17
  40. package/templates/do/config +49 -4
  41. package/templates/do/deploy +513 -362
  42. package/templates/do/ic/default.conf +32 -0
  43. package/templates/do/lib/endpoint-config.sh +216 -0
  44. package/templates/do/lib/inference-component.sh +167 -0
  45. package/templates/do/lib/secrets.sh +44 -0
  46. package/templates/do/lib/wait.sh +131 -0
  47. package/templates/do/logs +107 -27
  48. package/templates/do/optimize +528 -0
  49. package/templates/do/register +119 -2
  50. package/templates/do/status +337 -0
  51. package/templates/do/test +80 -28
  52. package/templates/triton/Dockerfile +5 -0
@@ -0,0 +1,718 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # do/benchmark — Run SageMaker AI Benchmark against deployed endpoint
6
+ # Uses NVIDIA AIPerf via the SageMaker AI Benchmarking service to measure
7
+ # LLM endpoint performance: throughput, latency, TTFT, and ITL.
8
+
9
+ set -e
10
+ set -u
11
+ set -o pipefail
12
+
13
+ # ── Source project configuration ──────────────────────────────────────────────
14
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15
+ source "${SCRIPT_DIR}/config"
16
+
17
+ # ── Parse flags ───────────────────────────────────────────────────────────────
18
+ CLEAN_AFTER=false
19
+ FORCE=false
20
+ IC_ARG=""
21
+ ADAPTER_ARG=""
22
+ while [ $# -gt 0 ]; do
23
+ case "$1" in
24
+ --clean) CLEAN_AFTER=true; shift ;;
25
+ --force) FORCE=true; shift ;;
26
+ --ic) shift; IC_ARG="${1:-}"; shift ;;
27
+ --adapter) shift; ADAPTER_ARG="${1:-}"; shift ;;
28
+ --help|-h)
29
+ echo "Usage: ./do/benchmark [--ic <name>] [--adapter <name>] [--force] [--clean]"
30
+ echo ""
31
+ echo "Run SageMaker AI Benchmark against the deployed endpoint."
32
+ echo ""
33
+ echo "Options:"
34
+ echo " --ic <name> Benchmark a specific inference component"
35
+ echo " --adapter <name> Benchmark a specific LoRA adapter IC"
36
+ echo " --force Create a new benchmark job even if one is already running"
37
+ echo " --clean Delete workload config and benchmark job after displaying results"
38
+ echo ""
39
+ echo "IC resolution:"
40
+ echo " --adapter <name> Use ADAPTER_IC_NAME from do/adapters/<name>.conf"
41
+ echo " --ic <name> Use IC_DEPLOYED_NAME from do/ic/<name>.conf"
42
+ echo " (no flag) Use first IC in do/ic/ alphabetically, or legacy config"
43
+ echo ""
44
+ echo "Idempotency:"
45
+ echo " If a benchmark job is already in progress, re-running without --force"
46
+ echo " will resume waiting for the existing job and display its results."
47
+ echo ""
48
+ echo "Prerequisites:"
49
+ echo " • Endpoint must be deployed and InService (run ./do/deploy first)"
50
+ echo " • AWS credentials must be configured"
51
+ exit 0
52
+ ;;
53
+ *) shift ;;
54
+ esac
55
+ done
56
+
57
+ # ── Verify AWS CLI v2 ─────────────────────────────────────────────────────────
58
+ if ! aws --version 2>&1 | grep -q "aws-cli/2"; then
59
+ echo "❌ AWS CLI v2 is required for benchmarking."
60
+ echo " The SageMaker AI Benchmarking API is only available in CLI v2."
61
+ echo " Detected: $(aws --version 2>&1 | head -1)"
62
+ echo ""
63
+ echo " Install CLI v2: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
64
+ exit 1
65
+ fi
66
+
67
+ # ── Resolve inference component name ──────────────────────────────────────────
68
+ # Resolution precedence: --adapter <name>, --ic <name>, first in do/ic/, or legacy config
69
+ IC_NAME=""
70
+ if [ -n "${ADAPTER_ARG}" ]; then
71
+ # Adapter name provided via --adapter flag — look up adapter IC
72
+ ADAPTER_CONF="${SCRIPT_DIR}/adapters/${ADAPTER_ARG}.conf"
73
+ if [ ! -f "${ADAPTER_CONF}" ]; then
74
+ echo "❌ Adapter config not found: do/adapters/${ADAPTER_ARG}.conf"
75
+ echo " Available adapters:"
76
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
77
+ for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
78
+ [ -f "${conf}" ] || continue
79
+ echo " • $(basename "${conf}" .conf)"
80
+ done
81
+ else
82
+ echo " (none)"
83
+ fi
84
+ exit 1
85
+ fi
86
+ ADAPTER_IC_NAME=""
87
+ source "${ADAPTER_CONF}"
88
+ if [ -z "${ADAPTER_IC_NAME}" ]; then
89
+ echo "❌ Adapter '${ADAPTER_ARG}' conf is missing ADAPTER_IC_NAME."
90
+ exit 1
91
+ fi
92
+ IC_NAME="${ADAPTER_IC_NAME}"
93
+ elif [ -n "${IC_ARG}" ]; then
94
+ # Explicit IC name provided via --ic flag
95
+ IC_CONF="${SCRIPT_DIR}/ic/${IC_ARG}.conf"
96
+ if [ ! -f "${IC_CONF}" ]; then
97
+ echo "❌ IC config not found: do/ic/${IC_ARG}.conf"
98
+ exit 1
99
+ fi
100
+ IC_DEPLOYED_NAME=""
101
+ source "${IC_CONF}"
102
+ if [ -z "${IC_DEPLOYED_NAME}" ]; then
103
+ echo "❌ IC '${IC_ARG}' has not been deployed yet. Run ./do/deploy --ic ${IC_ARG} first."
104
+ exit 1
105
+ fi
106
+ IC_NAME="${IC_DEPLOYED_NAME}"
107
+ elif [ -d "${SCRIPT_DIR}/ic" ]; then
108
+ # No --ic argument, but do/ic/ exists — use first IC alphabetically
109
+ for conf in "${SCRIPT_DIR}"/ic/*.conf; do
110
+ [ -f "${conf}" ] || continue
111
+ IC_DEPLOYED_NAME=""
112
+ source "${conf}"
113
+ if [ -n "${IC_DEPLOYED_NAME}" ]; then
114
+ IC_NAME="${IC_DEPLOYED_NAME}"
115
+ break
116
+ fi
117
+ done
118
+ if [ -z "${IC_NAME}" ]; then
119
+ echo "❌ No ICs deployed. Run ./do/deploy first."
120
+ exit 1
121
+ fi
122
+ else
123
+ # Legacy: no do/ic/ directory, use INFERENCE_COMPONENT_NAME from do/config
124
+ IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
125
+ fi
126
+
127
+ # ── Helper: update a variable in do/config ────────────────────────────────────
128
+ _update_benchmark_var() {
129
+ local var_name="$1"
130
+ local var_value="$2"
131
+ local config_file="${SCRIPT_DIR}/config"
132
+
133
+ if grep -q "^export ${var_name}=" "${config_file}" 2>/dev/null; then
134
+ sed -i.bak "s|^export ${var_name}=.*|export ${var_name}=\"${var_value}\"|" "${config_file}"
135
+ rm -f "${config_file}.bak"
136
+ else
137
+ echo "export ${var_name}=\"${var_value}\"" >> "${config_file}"
138
+ fi
139
+ }
140
+
141
+ # ── Idempotency: Check for existing benchmark job ─────────────────────────────
142
+ # If BENCHMARK_JOB_NAME is set in do/config and the job is still running,
143
+ # resume waiting for it instead of creating a new one (unless --force is used).
144
+ RESUME_EXISTING=false
145
+
146
+ if [ "${FORCE}" = false ] && [ -n "${BENCHMARK_JOB_NAME:-}" ]; then
147
+ EXISTING_STATUS=$(aws sagemaker describe-ai-benchmark-job \
148
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
149
+ --region "${AWS_REGION}" \
150
+ --query 'AIBenchmarkJobStatus' \
151
+ --output text 2>/dev/null) || EXISTING_STATUS=""
152
+
153
+ case "${EXISTING_STATUS}" in
154
+ InProgress|Starting|Pending)
155
+ echo "📊 Resuming existing benchmark job: ${BENCHMARK_JOB_NAME}"
156
+ echo " Status: ${EXISTING_STATUS}"
157
+ echo " (use --force to start a new benchmark instead)"
158
+ echo ""
159
+ RESUME_EXISTING=true
160
+ ;;
161
+ Completed)
162
+ echo "📊 Previous benchmark job already completed: ${BENCHMARK_JOB_NAME}"
163
+ echo " (use --force to start a new benchmark)"
164
+ echo ""
165
+ RESUME_EXISTING=true
166
+ JOB_STATUS="Completed"
167
+ ;;
168
+ Failed|Stopped)
169
+ FAILURE_REASON=$(aws sagemaker describe-ai-benchmark-job \
170
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
171
+ --region "${AWS_REGION}" \
172
+ --query 'FailureReason' \
173
+ --output text 2>/dev/null) || FAILURE_REASON="unknown"
174
+ echo "⚠️ Previous benchmark job ${EXISTING_STATUS}: ${BENCHMARK_JOB_NAME}"
175
+ if [ "${EXISTING_STATUS}" = "Failed" ] && [ -n "${FAILURE_REASON}" ] && [ "${FAILURE_REASON}" != "None" ]; then
176
+ echo " Reason: ${FAILURE_REASON}"
177
+ fi
178
+ echo " Use --force to start a new benchmark."
179
+ exit 1
180
+ ;;
181
+ *)
182
+ # Job doesn't exist or can't be described — proceed with new job
183
+ ;;
184
+ esac
185
+ fi
186
+
187
+ # ── Configuration ─────────────────────────────────────────────────────────────
188
+ WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
189
+ if [ "${RESUME_EXISTING}" = false ]; then
190
+ BENCHMARK_JOB_NAME="${PROJECT_NAME}-benchmark-$(date +%Y%m%d-%H%M%S)"
191
+ fi
192
+ POLL_INTERVAL=30
193
+ MAX_POLL_ATTEMPTS=60 # 30 minutes max (60 * 30s)
194
+
195
+ echo "📊 SageMaker AI Benchmark"
196
+ echo " Project: ${PROJECT_NAME}"
197
+ echo " Endpoint: ${ENDPOINT_NAME:-not set}"
198
+ echo " Inference Component: ${IC_NAME:-not set}"
199
+ echo " Concurrency: ${BENCHMARK_CONCURRENCY}"
200
+ echo " Input tokens (mean): ${BENCHMARK_INPUT_TOKENS_MEAN}"
201
+ echo " Output tokens (mean): ${BENCHMARK_OUTPUT_TOKENS_MEAN}"
202
+ echo " Streaming: ${BENCHMARK_STREAMING}"
203
+ if [ -n "${BENCHMARK_REQUEST_COUNT:-}" ]; then
204
+ echo " Request count: ${BENCHMARK_REQUEST_COUNT}"
205
+ fi
206
+ echo " S3 output: ${BENCHMARK_S3_OUTPUT_PATH}"
207
+ echo ""
208
+
209
+ # ── Pre-flight check: Verify endpoint is InService ────────────────────────────
210
+ if [ "${RESUME_EXISTING}" = false ]; then
211
+
212
+ echo "🔍 Pre-flight: Verifying endpoint status..."
213
+
214
+ if [ -z "${ENDPOINT_NAME:-}" ]; then
215
+ echo "❌ ENDPOINT_NAME is not set in do/config"
216
+ echo " Deploy your endpoint first: ./do/deploy"
217
+ exit 1
218
+ fi
219
+
220
+ ENDPOINT_STATUS=$(aws sagemaker describe-endpoint \
221
+ --endpoint-name "${ENDPOINT_NAME}" \
222
+ --region "${AWS_REGION}" \
223
+ --query 'EndpointStatus' \
224
+ --output text 2>/dev/null) || {
225
+ echo "❌ Failed to describe endpoint: ${ENDPOINT_NAME}"
226
+ echo " Check that the endpoint exists and your AWS credentials are valid."
227
+ exit 1
228
+ }
229
+
230
+ if [ "${ENDPOINT_STATUS}" != "InService" ]; then
231
+ echo "❌ Endpoint is not InService (current status: ${ENDPOINT_STATUS})"
232
+ echo " The endpoint must be InService before running a benchmark."
233
+ echo " Check status: aws sagemaker describe-endpoint --endpoint-name ${ENDPOINT_NAME} --region ${AWS_REGION}"
234
+ exit 1
235
+ fi
236
+
237
+ echo "✅ Endpoint is InService: ${ENDPOINT_NAME}"
238
+
239
+ # ── Pre-flight check: Ensure S3 output bucket exists ──────────────────────────
240
+ echo "🔍 Pre-flight: Checking S3 output bucket..."
241
+
242
+ BENCHMARK_S3_BUCKET=$(echo "${BENCHMARK_S3_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
243
+
244
+ if ! aws s3api head-bucket --bucket "${BENCHMARK_S3_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
245
+ echo "📦 Creating S3 bucket: ${BENCHMARK_S3_BUCKET}"
246
+ if [ "${AWS_REGION}" = "us-east-1" ]; then
247
+ if ! aws s3api create-bucket \
248
+ --bucket "${BENCHMARK_S3_BUCKET}" \
249
+ --region "${AWS_REGION}"; then
250
+ echo "❌ Failed to create S3 bucket: ${BENCHMARK_S3_BUCKET}"
251
+ exit 1
252
+ fi
253
+ else
254
+ if ! aws s3api create-bucket \
255
+ --bucket "${BENCHMARK_S3_BUCKET}" \
256
+ --region "${AWS_REGION}" \
257
+ --create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
258
+ echo "❌ Failed to create S3 bucket: ${BENCHMARK_S3_BUCKET}"
259
+ exit 1
260
+ fi
261
+ fi
262
+ echo "✅ S3 bucket created: ${BENCHMARK_S3_BUCKET}"
263
+ else
264
+ echo "✅ S3 bucket exists: ${BENCHMARK_S3_BUCKET}"
265
+ fi
266
+
267
+ # ── Pre-flight check: Ensure Secrets Manager secret for HF token ──────────────
268
+ # The benchmarking service requires a Secrets Manager ARN for tokenizer access.
269
+ # If HF_TOKEN is available (plaintext or resolved from ARN), store it in Secrets Manager.
270
+ SECRET_ARN=""
271
+
272
+ if [ -n "${HF_TOKEN_ARN:-}" ]; then
273
+ # Already using Secrets Manager ARN — use it directly
274
+ SECRET_ARN="${HF_TOKEN_ARN}"
275
+ echo "✅ Using existing Secrets Manager ARN for HF token: ${SECRET_ARN}"
276
+ elif [ -n "${HF_TOKEN:-}" ]; then
277
+ # Plaintext HF token provided — store in Secrets Manager for the benchmark service
278
+ SECRET_NAME="ml-container-creator/${PROJECT_NAME}/hf-token"
279
+ echo "🔐 Pre-flight: Ensuring Secrets Manager secret for HF token..."
280
+
281
+ if ! aws secretsmanager describe-secret --secret-id "$SECRET_NAME" --region "$AWS_REGION" 2>/dev/null; then
282
+ echo " Creating Secrets Manager secret: ${SECRET_NAME}"
283
+ aws secretsmanager create-secret \
284
+ --name "$SECRET_NAME" \
285
+ --secret-string "$HF_TOKEN" \
286
+ --region "$AWS_REGION" > /dev/null || {
287
+ echo "❌ Failed to create Secrets Manager secret"
288
+ exit 1
289
+ }
290
+ else
291
+ echo " Updating Secrets Manager secret: ${SECRET_NAME}"
292
+ aws secretsmanager put-secret-value \
293
+ --secret-id "$SECRET_NAME" \
294
+ --secret-string "$HF_TOKEN" \
295
+ --region "$AWS_REGION" > /dev/null || {
296
+ echo "❌ Failed to update Secrets Manager secret"
297
+ exit 1
298
+ }
299
+ fi
300
+
301
+ SECRET_ARN=$(aws secretsmanager describe-secret \
302
+ --secret-id "$SECRET_NAME" \
303
+ --region "$AWS_REGION" \
304
+ --query 'ARN' \
305
+ --output text)
306
+ echo "✅ HF token stored in Secrets Manager: ${SECRET_ARN}"
307
+ else
308
+ echo "⚠️ No HF_TOKEN provided — tokenizer-based metrics (TTFT, ITL) may be unavailable"
309
+ fi
310
+
311
+ echo ""
312
+
313
+ # ── Step 1: Create AI Workload Config ─────────────────────────────────────────
314
+ # Build the inline workload spec JSON from do/config variables.
315
+ # The workload spec defines benchmark type, parameters, tooling, and secrets.
316
+ echo "⚙️ Step 1: Creating AI Workload Config: ${WORKLOAD_CONFIG_NAME}"
317
+
318
+ # Build parameters block
319
+ PARAMS_JSON="{\"prompt_input_tokens_mean\":${BENCHMARK_INPUT_TOKENS_MEAN},\"output_tokens_mean\":${BENCHMARK_OUTPUT_TOKENS_MEAN},\"concurrency\":${BENCHMARK_CONCURRENCY},\"streaming\":${BENCHMARK_STREAMING},\"tokenizer\":\"${MODEL_NAME}\""
320
+
321
+ # Add optional request_count if specified
322
+ if [ -n "${BENCHMARK_REQUEST_COUNT:-}" ]; then
323
+ PARAMS_JSON="${PARAMS_JSON},\"request_count\":${BENCHMARK_REQUEST_COUNT}"
324
+ fi
325
+
326
+ PARAMS_JSON="${PARAMS_JSON}}"
327
+
328
+ # Build secrets block (only if HF token is available)
329
+ SECRETS_JSON=""
330
+ if [ -n "${SECRET_ARN}" ]; then
331
+ SECRETS_JSON=",\"secrets\":{\"hf_token\":\"${SECRET_ARN}\"}"
332
+ fi
333
+
334
+ # Assemble full workload spec (inline YAML/JSON string for the WorkloadSpec.Inline field)
335
+ WORKLOAD_SPEC="{\"benchmark\":{\"type\":\"aiperf\"},\"parameters\":${PARAMS_JSON},\"tooling\":{\"api_standard\":\"openai\"}${SECRETS_JSON}}"
336
+
337
+ # Wrap in the API's expected structure: --ai-workload-configs '{"WorkloadSpec":{"Inline":"..."}}'
338
+ # The Inline field takes the spec as a JSON-encoded string
339
+ WORKLOAD_CONFIGS="{\"WorkloadSpec\":{\"Inline\":$(echo "${WORKLOAD_SPEC}" | python3 -c 'import sys,json; print(json.dumps(sys.stdin.read().strip()))')}}"
340
+
341
+ # Workload config idempotency: reuse if params match, recreate if they differ
342
+ EXISTING_CONFIG_SPEC=""
343
+ if aws sagemaker describe-ai-workload-config \
344
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
345
+ --region "${AWS_REGION}" 2>/dev/null >/dev/null; then
346
+ EXISTING_CONFIG_SPEC=$(aws sagemaker describe-ai-workload-config \
347
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
348
+ --region "${AWS_REGION}" \
349
+ --query 'AIWorkloadConfigs.WorkloadSpec.Inline' \
350
+ --output text 2>/dev/null) || EXISTING_CONFIG_SPEC=""
351
+ fi
352
+
353
+ if [ -n "${EXISTING_CONFIG_SPEC}" ]; then
354
+ # Compare existing spec with desired spec (normalize for comparison)
355
+ EXISTING_NORMALIZED=$(echo "${EXISTING_CONFIG_SPEC}" | python3 -c "import sys,json; print(json.dumps(json.loads(sys.stdin.read()), sort_keys=True))" 2>/dev/null) || EXISTING_NORMALIZED=""
356
+ DESIRED_NORMALIZED=$(echo "${WORKLOAD_SPEC}" | python3 -c "import sys,json; print(json.dumps(json.loads(sys.stdin.read()), sort_keys=True))" 2>/dev/null) || DESIRED_NORMALIZED=""
357
+
358
+ if [ "${EXISTING_NORMALIZED}" = "${DESIRED_NORMALIZED}" ]; then
359
+ echo " ✅ Existing workload config matches current parameters — reusing"
360
+ else
361
+ echo " ⚠️ Workload config parameters changed — recreating..."
362
+ aws sagemaker delete-ai-workload-config \
363
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
364
+ --region "${AWS_REGION}" || true
365
+ CREATE_WORKLOAD_CONFIG=true
366
+ fi
367
+ else
368
+ CREATE_WORKLOAD_CONFIG=true
369
+ fi
370
+
371
+ if [ "${CREATE_WORKLOAD_CONFIG:-true}" = "true" ]; then
372
+ # Create the workload config
373
+ if ! aws sagemaker create-ai-workload-config \
374
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
375
+ --ai-workload-configs "${WORKLOAD_CONFIGS}" \
376
+ --region "${AWS_REGION}"; then
377
+ echo "❌ Failed to create AI Workload Config"
378
+ echo " This may indicate the SageMaker AI Benchmarking API is not available in region: ${AWS_REGION}"
379
+ echo " Check: https://docs.aws.amazon.com/sagemaker/latest/dg/regions-quotas.html"
380
+ exit 1
381
+ fi
382
+ echo "✅ Workload config created: ${WORKLOAD_CONFIG_NAME}"
383
+ fi
384
+
385
+ # Persist workload config name for resume
386
+ _update_benchmark_var "BENCHMARK_WORKLOAD_CONFIG_NAME" "${WORKLOAD_CONFIG_NAME}"
387
+ echo ""
388
+
389
+ # ── Step 2: Create AI Benchmark Job ──────────────────────────────────────────
390
+ # Target the deployed endpoint and inference component with the workload config.
391
+ echo "🚀 Step 2: Creating AI Benchmark Job: ${BENCHMARK_JOB_NAME}"
392
+
393
+ BENCHMARK_TARGET="{\"Endpoint\":{\"Identifier\":\"${ENDPOINT_NAME}\",\"InferenceComponents\":[{\"Identifier\":\"${IC_NAME}\"}]}}"
394
+ OUTPUT_CONFIG="{\"S3OutputLocation\":\"${BENCHMARK_S3_OUTPUT_PATH}\"}"
395
+
396
+ if ! aws sagemaker create-ai-benchmark-job \
397
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
398
+ --benchmark-target "${BENCHMARK_TARGET}" \
399
+ --output-config "${OUTPUT_CONFIG}" \
400
+ --ai-workload-config-identifier "${WORKLOAD_CONFIG_NAME}" \
401
+ --role-arn "${ROLE_ARN}" \
402
+ --region "${AWS_REGION}"; then
403
+ echo "❌ Failed to create AI Benchmark Job"
404
+ echo " Check that:"
405
+ echo " • The execution role has sagemaker:CreateAIBenchmarkJob permission"
406
+ echo " • The endpoint and inference component are valid"
407
+ echo " • The S3 output path is accessible: ${BENCHMARK_S3_OUTPUT_PATH}"
408
+ exit 1
409
+ fi
410
+
411
+ echo "✅ Benchmark job created: ${BENCHMARK_JOB_NAME}"
412
+
413
+ # Save job name to do/config for idempotency on re-run
414
+ _update_benchmark_var "BENCHMARK_JOB_NAME" "${BENCHMARK_JOB_NAME}"
415
+
416
+ echo ""
417
+
418
+ fi # end of RESUME_EXISTING=false block
419
+
420
+ # ── Step 3: Poll for completion ───────────────────────────────────────────────
421
+ # Poll describe-ai-benchmark-job every POLL_INTERVAL seconds until terminal state.
422
+ # Terminal states: Completed, Failed, Stopped
423
+
424
+ # Skip polling if we already know the job completed (resumed a finished job)
425
+ if [ "${JOB_STATUS:-}" != "Completed" ] && [ "${JOB_STATUS:-}" != "Failed" ] && [ "${JOB_STATUS:-}" != "Stopped" ]; then
426
+
427
+ echo "⏳ Step 3: Waiting for benchmark to complete..."
428
+ echo " Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 30 min)"
429
+ echo ""
430
+
431
+ POLL_COUNT=0
432
+ JOB_STATUS=""
433
+
434
+ while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
435
+ JOB_STATUS=$(aws sagemaker describe-ai-benchmark-job \
436
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
437
+ --region "${AWS_REGION}" \
438
+ --query 'AIBenchmarkJobStatus' \
439
+ --output text 2>/dev/null) || {
440
+ echo "⚠️ Failed to describe benchmark job (credentials may have expired)"
441
+ echo " Re-run to check status manually:"
442
+ echo " aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
443
+ exit 1
444
+ }
445
+
446
+ case "${JOB_STATUS}" in
447
+ Completed)
448
+ echo "✅ Benchmark completed successfully!"
449
+ break
450
+ ;;
451
+ Failed)
452
+ echo "❌ Benchmark job failed"
453
+ break
454
+ ;;
455
+ Stopped)
456
+ echo "⚠️ Benchmark job was stopped"
457
+ break
458
+ ;;
459
+ *)
460
+ POLL_COUNT=$((POLL_COUNT + 1))
461
+ ELAPSED=$((POLL_COUNT * POLL_INTERVAL))
462
+ echo " $(date +%H:%M:%S) Status: ${JOB_STATUS} (${ELAPSED}s elapsed)"
463
+ sleep ${POLL_INTERVAL}
464
+ ;;
465
+ esac
466
+ done
467
+
468
+ # Check for timeout
469
+ if [ ${POLL_COUNT} -ge ${MAX_POLL_ATTEMPTS} ]; then
470
+ echo ""
471
+ echo "⚠️ Benchmark timed out after 30 minutes (status: ${JOB_STATUS})"
472
+ echo " The job may still be running. Re-run ./do/benchmark to resume waiting."
473
+ echo " Or check status manually:"
474
+ echo " aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
475
+ exit 1
476
+ fi
477
+
478
+ fi # end of polling conditional
479
+
480
+ echo ""
481
+
482
+ # ── Step 4: Display results ───────────────────────────────────────────────────
483
+ if [ "${JOB_STATUS}" = "Completed" ]; then
484
+ # Persist results locally to benchmarks/<job-name>/
485
+ PROJECT_ROOT="${SCRIPT_DIR}/.."
486
+ LOCAL_RESULTS_DIR="${PROJECT_ROOT}/benchmarks/${BENCHMARK_JOB_NAME}"
487
+ RESULTS_FILE="${LOCAL_RESULTS_DIR}/results.json"
488
+
489
+ # Check if results already exist locally (idempotency: skip S3 download)
490
+ if [ -f "${RESULTS_FILE}" ]; then
491
+ echo "📥 Step 4: Results already available locally"
492
+ RESULTS_DOWNLOADED=true
493
+ else
494
+ echo "📥 Step 4: Downloading benchmark results..."
495
+
496
+ RESULTS_S3_PATH=$(aws sagemaker describe-ai-benchmark-job \
497
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
498
+ --region "${AWS_REGION}" \
499
+ --query 'OutputConfig.S3OutputLocation' \
500
+ --output text 2>/dev/null)
501
+
502
+ # Create local benchmarks directory
503
+ mkdir -p "${LOCAL_RESULTS_DIR}"
504
+
505
+ # The benchmark service writes results into a subdirectory (e.g., bmk-prod-<job>-<hash>/)
506
+ # under the S3OutputLocation. We use multiple strategies to locate the results file.
507
+ RESULTS_DOWNLOADED=false
508
+
509
+ # Ensure RESULTS_S3_PATH has a trailing slash for consistent path joining
510
+ RESULTS_S3_PATH="${RESULTS_S3_PATH%/}/"
511
+
512
+ # Strategy 1: Sync the entire output tree locally, then find results
513
+ # This is the most reliable approach — handles any subdirectory structure
514
+ echo " Syncing results from S3..."
515
+ if aws s3 sync "${RESULTS_S3_PATH}" "${LOCAL_RESULTS_DIR}/" --region "${AWS_REGION}" 2>/dev/null; then
516
+ # Look for any JSON file in the synced directory tree
517
+ FOUND_FILE=$(find "${LOCAL_RESULTS_DIR}" -name "*.json" -type f 2>/dev/null | head -1)
518
+ if [ -n "${FOUND_FILE}" ]; then
519
+ # If the found file isn't already at our canonical path, copy it there
520
+ if [ "${FOUND_FILE}" != "${RESULTS_FILE}" ]; then
521
+ cp "${FOUND_FILE}" "${RESULTS_FILE}"
522
+ fi
523
+ RESULTS_DOWNLOADED=true
524
+ fi
525
+ fi
526
+
527
+ # Strategy 2: If sync found nothing, try listing and downloading individual files
528
+ # This handles cases where s3 sync silently fails (permissions, empty prefix match)
529
+ if [ "${RESULTS_DOWNLOADED}" = false ]; then
530
+ echo " Searching for results files..."
531
+ RESULTS_BUCKET=$(echo "${RESULTS_S3_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
532
+ RESULTS_PREFIX=$(echo "${RESULTS_S3_PATH}" | sed "s|s3://${RESULTS_BUCKET}/||")
533
+
534
+ # List all objects under the output path and find data files
535
+ # aws s3api list-objects-v2 is more reliable than aws s3 ls --recursive
536
+ FOUND_KEY=$(aws s3api list-objects-v2 \
537
+ --bucket "${RESULTS_BUCKET}" \
538
+ --prefix "${RESULTS_PREFIX}" \
539
+ --region "${AWS_REGION}" \
540
+ --query 'Contents[].Key' \
541
+ --output text 2>/dev/null \
542
+ | tr '\t' '\n' \
543
+ | grep -E '\.(json|jsonl|csv)$' \
544
+ | head -1)
545
+
546
+ if [ -n "${FOUND_KEY}" ] && [ "${FOUND_KEY}" != "None" ]; then
547
+ if aws s3 cp "s3://${RESULTS_BUCKET}/${FOUND_KEY}" "${RESULTS_FILE}" --region "${AWS_REGION}" 2>/dev/null; then
548
+ RESULTS_DOWNLOADED=true
549
+ fi
550
+ fi
551
+ fi
552
+
553
+ # Strategy 3: If still nothing, try direct path patterns the service might use
554
+ if [ "${RESULTS_DOWNLOADED}" = false ]; then
555
+ for PATTERN in "results.json" "benchmark_results.json" "output.json"; do
556
+ if aws s3 cp "${RESULTS_S3_PATH}${PATTERN}" "${RESULTS_FILE}" --region "${AWS_REGION}" 2>/dev/null; then
557
+ RESULTS_DOWNLOADED=true
558
+ break
559
+ fi
560
+ done
561
+ fi
562
+ fi
563
+
564
+ if [ "${RESULTS_DOWNLOADED}" = true ]; then
565
+ echo "✅ Results downloaded"
566
+ echo ""
567
+
568
+ # Display summary table
569
+ echo "╔══════════════════════════════════════════════════════════════════╗"
570
+ echo "║ SageMaker AI Benchmark Results ║"
571
+ echo "╠══════════════════════════════════════════════════════════════════╣"
572
+ echo "║ Job: ${BENCHMARK_JOB_NAME}"
573
+ echo "║ Endpoint: ${ENDPOINT_NAME}"
574
+ echo "╠══════════════════════════════════════════════════════════════════╣"
575
+
576
+ # Parse and display metrics using built-in tools
577
+ # Extract key metrics from the results JSON
578
+ if command -v python3 &>/dev/null; then
579
+ python3 -c "
580
+ import json, sys
581
+
582
+ try:
583
+ with open('${RESULTS_FILE}') as f:
584
+ data = json.load(f)
585
+
586
+ metrics = data if isinstance(data, dict) else {}
587
+
588
+ # Helper to safely get nested values
589
+ def get_metric(d, *keys):
590
+ for k in keys:
591
+ if isinstance(d, dict):
592
+ d = d.get(k, 'N/A')
593
+ else:
594
+ return 'N/A'
595
+ return d
596
+
597
+ # Display throughput
598
+ throughput = get_metric(metrics, 'request_throughput')
599
+ output_throughput = get_metric(metrics, 'output_token_throughput')
600
+ print(f'║ Request Throughput: {throughput} req/s')
601
+ print(f'║ Output Token Throughput: {output_throughput} tokens/s')
602
+ print('║')
603
+
604
+ # Display request latency
605
+ lat_p50 = get_metric(metrics, 'request_latency', 'p50')
606
+ lat_p90 = get_metric(metrics, 'request_latency', 'p90')
607
+ lat_p99 = get_metric(metrics, 'request_latency', 'p99')
608
+ print(f'║ Request Latency (ms):')
609
+ print(f'║ P50: {lat_p50} P90: {lat_p90} P99: {lat_p99}')
610
+ print('║')
611
+
612
+ # Display TTFT (time to first token)
613
+ ttft_p50 = get_metric(metrics, 'time_to_first_token', 'p50')
614
+ ttft_p90 = get_metric(metrics, 'time_to_first_token', 'p90')
615
+ ttft_p99 = get_metric(metrics, 'time_to_first_token', 'p99')
616
+ print(f'║ Time to First Token (ms):')
617
+ print(f'║ P50: {ttft_p50} P90: {ttft_p90} P99: {ttft_p99}')
618
+ print('║')
619
+
620
+ # Display ITL (inter-token latency)
621
+ itl_p50 = get_metric(metrics, 'inter_token_latency', 'p50')
622
+ itl_p90 = get_metric(metrics, 'inter_token_latency', 'p90')
623
+ itl_p99 = get_metric(metrics, 'inter_token_latency', 'p99')
624
+ print(f'║ Inter-Token Latency (ms):')
625
+ print(f'║ P50: {itl_p50} P90: {itl_p90} P99: {itl_p99}')
626
+
627
+ except Exception as e:
628
+ print(f'║ ⚠️ Could not parse results: {e}')
629
+ print(f'║ Raw file: ${RESULTS_FILE}')
630
+ "
631
+ else
632
+ # Fallback: display raw JSON if python3 is not available
633
+ echo "║ (python3 not available — showing raw results)"
634
+ echo "║"
635
+ cat "${RESULTS_FILE}" | head -50
636
+ fi
637
+
638
+ echo "╚══════════════════════════════════════════════════════════════════╝"
639
+ echo ""
640
+ echo "📁 Results saved to: benchmarks/${BENCHMARK_JOB_NAME}/"
641
+ echo "☁️ S3 results: ${RESULTS_S3_PATH:-${BENCHMARK_S3_OUTPUT_PATH}}"
642
+ else
643
+ echo "⚠️ Could not download results from S3"
644
+ echo " The benchmark completed but results could not be located."
645
+ echo ""
646
+ echo " Debug — list objects at the output path:"
647
+ echo " aws s3 ls ${RESULTS_S3_PATH} --recursive --region ${AWS_REGION}"
648
+ echo ""
649
+ echo " Or list via API:"
650
+ RESULTS_BUCKET=$(echo "${RESULTS_S3_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
651
+ RESULTS_PREFIX=$(echo "${RESULTS_S3_PATH}" | sed "s|s3://${RESULTS_BUCKET}/||")
652
+ echo " aws s3api list-objects-v2 --bucket ${RESULTS_BUCKET} --prefix ${RESULTS_PREFIX} --region ${AWS_REGION}"
653
+ echo ""
654
+ # Show what's actually there to help debug
655
+ echo " Objects found at output path:"
656
+ aws s3api list-objects-v2 \
657
+ --bucket "${RESULTS_BUCKET}" \
658
+ --prefix "${RESULTS_PREFIX}" \
659
+ --region "${AWS_REGION}" \
660
+ --query 'Contents[].{Key: Key, Size: Size}' \
661
+ --output table 2>/dev/null || echo " (could not list objects)"
662
+ fi
663
+
664
+ elif [ "${JOB_STATUS}" = "Failed" ]; then
665
+ # Display failure reason
666
+ echo "❌ Step 4: Benchmark job failed"
667
+ FAILURE_REASON=$(aws sagemaker describe-ai-benchmark-job \
668
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
669
+ --region "${AWS_REGION}" \
670
+ --query 'FailureReason' \
671
+ --output text 2>/dev/null)
672
+ echo " Reason: ${FAILURE_REASON}"
673
+ echo ""
674
+ echo " Debug:"
675
+ echo " aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
676
+
677
+ elif [ "${JOB_STATUS}" = "Stopped" ]; then
678
+ echo "⚠️ Step 4: Benchmark job was stopped before completion"
679
+ echo " No results available."
680
+ fi
681
+
682
+ # ── Optional cleanup (--clean flag) ───────────────────────────────────────────
683
+ # Delete workload config and benchmark job to avoid resource accumulation.
684
+ if [ "${CLEAN_AFTER}" = true ]; then
685
+ echo ""
686
+ echo "🧹 Cleaning up benchmark resources (--clean)..."
687
+
688
+ # Delete workload config
689
+ if aws sagemaker delete-ai-workload-config \
690
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
691
+ --region "${AWS_REGION}" 2>/dev/null; then
692
+ echo " ✓ Deleted workload config: ${WORKLOAD_CONFIG_NAME}"
693
+ else
694
+ echo " ⚠️ Could not delete workload config: ${WORKLOAD_CONFIG_NAME}"
695
+ fi
696
+
697
+ # Delete benchmark job (must be in terminal state)
698
+ if aws sagemaker delete-ai-benchmark-job \
699
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
700
+ --region "${AWS_REGION}" 2>/dev/null; then
701
+ echo " ✓ Deleted benchmark job: ${BENCHMARK_JOB_NAME}"
702
+ else
703
+ echo " ⚠️ Could not delete benchmark job: ${BENCHMARK_JOB_NAME}"
704
+ fi
705
+
706
+ echo "✅ Cleanup complete"
707
+ fi
708
+
709
+ echo ""
710
+ echo "📋 Summary:"
711
+ echo " Workload Config: ${WORKLOAD_CONFIG_NAME}"
712
+ echo " Benchmark Job: ${BENCHMARK_JOB_NAME}"
713
+ echo " Status: ${JOB_STATUS}"
714
+ echo ""
715
+ if [ "${CLEAN_AFTER}" = false ]; then
716
+ echo "🧹 To clean up benchmark resources:"
717
+ echo " ./do/clean benchmark"
718
+ fi