@aws/ml-container-creator 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -262,6 +262,39 @@ Clean everything:
262
262
 
263
263
  ---
264
264
 
265
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
266
+ ### `./do/benchmark`
267
+
268
+ Run SageMaker AI Benchmark against deployed endpoint.
269
+
270
+ **What it does:**
271
+ - Verifies endpoint is InService
272
+ - Ensures S3 output bucket exists
273
+ - Creates AI workload configuration
274
+ - Creates and monitors AI benchmark job
275
+ - Displays performance results (throughput, latency P50/P90/P99, TTFT, ITL)
276
+
277
+ **Prerequisites:**
278
+ - Endpoint deployed and InService (`./do/deploy`)
279
+ - AWS credentials configured
280
+
281
+ **Usage:**
282
+ ```bash
283
+ ./do/benchmark
284
+ ```
285
+
286
+ **Clean up benchmark resources:**
287
+ ```bash
288
+ ./do/benchmark --clean
289
+ ```
290
+
291
+ **Output:**
292
+ - Benchmark results summary table
293
+ - Detailed results in S3
294
+
295
+ ---
296
+
297
+ <% } %>
265
298
  <% if (buildTarget === 'codebuild') { %>
266
299
  ### `./do/submit`
267
300
 
@@ -0,0 +1,646 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # do/benchmark — Run SageMaker AI Benchmark against deployed endpoint
6
+ # Uses NVIDIA AIPerf via the SageMaker AI Benchmarking service to measure
7
+ # LLM endpoint performance: throughput, latency, TTFT, and ITL.
8
+
9
+ set -e
10
+ set -u
11
+ set -o pipefail
12
+
13
+ # ── Source project configuration ──────────────────────────────────────────────
14
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15
+ source "${SCRIPT_DIR}/config"
16
+
17
+ # ── Parse flags ───────────────────────────────────────────────────────────────
18
+ CLEAN_AFTER=false
19
+ FORCE=false
20
+ for arg in "$@"; do
21
+ case "$arg" in
22
+ --clean) CLEAN_AFTER=true ;;
23
+ --force) FORCE=true ;;
24
+ --help|-h)
25
+ echo "Usage: ./do/benchmark [--force] [--clean]"
26
+ echo ""
27
+ echo "Run SageMaker AI Benchmark against the deployed endpoint."
28
+ echo ""
29
+ echo "Options:"
30
+ echo " --force Create a new benchmark job even if one is already running"
31
+ echo " --clean Delete workload config and benchmark job after displaying results"
32
+ echo ""
33
+ echo "Idempotency:"
34
+ echo " If a benchmark job is already in progress, re-running without --force"
35
+ echo " will resume waiting for the existing job and display its results."
36
+ echo ""
37
+ echo "Prerequisites:"
38
+ echo " • Endpoint must be deployed and InService (run ./do/deploy first)"
39
+ echo " • AWS credentials must be configured"
40
+ exit 0
41
+ ;;
42
+ esac
43
+ done
44
+
45
+ # ── Verify AWS CLI v2 ─────────────────────────────────────────────────────────
46
+ if ! aws --version 2>&1 | grep -q "aws-cli/2"; then
47
+ echo "❌ AWS CLI v2 is required for benchmarking."
48
+ echo " The SageMaker AI Benchmarking API is only available in CLI v2."
49
+ echo " Detected: $(aws --version 2>&1 | head -1)"
50
+ echo ""
51
+ echo " Install CLI v2: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
52
+ exit 1
53
+ fi
54
+
55
+ # ── Helper: update a variable in do/config ────────────────────────────────────
56
+ _update_benchmark_var() {
57
+ local var_name="$1"
58
+ local var_value="$2"
59
+ local config_file="${SCRIPT_DIR}/config"
60
+
61
+ if grep -q "^export ${var_name}=" "${config_file}" 2>/dev/null; then
62
+ sed -i.bak "s|^export ${var_name}=.*|export ${var_name}=\"${var_value}\"|" "${config_file}"
63
+ rm -f "${config_file}.bak"
64
+ else
65
+ echo "export ${var_name}=\"${var_value}\"" >> "${config_file}"
66
+ fi
67
+ }
68
+
69
+ # ── Idempotency: Check for existing benchmark job ─────────────────────────────
70
+ # If BENCHMARK_JOB_NAME is set in do/config and the job is still running,
71
+ # resume waiting for it instead of creating a new one (unless --force is used).
72
+ RESUME_EXISTING=false
73
+
74
+ if [ "${FORCE}" = false ] && [ -n "${BENCHMARK_JOB_NAME:-}" ]; then
75
+ EXISTING_STATUS=$(aws sagemaker describe-ai-benchmark-job \
76
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
77
+ --region "${AWS_REGION}" \
78
+ --query 'AIBenchmarkJobStatus' \
79
+ --output text 2>/dev/null) || EXISTING_STATUS=""
80
+
81
+ case "${EXISTING_STATUS}" in
82
+ InProgress|Starting|Pending)
83
+ echo "📊 Resuming existing benchmark job: ${BENCHMARK_JOB_NAME}"
84
+ echo " Status: ${EXISTING_STATUS}"
85
+ echo " (use --force to start a new benchmark instead)"
86
+ echo ""
87
+ RESUME_EXISTING=true
88
+ ;;
89
+ Completed)
90
+ echo "📊 Previous benchmark job already completed: ${BENCHMARK_JOB_NAME}"
91
+ echo " (use --force to start a new benchmark)"
92
+ echo ""
93
+ RESUME_EXISTING=true
94
+ JOB_STATUS="Completed"
95
+ ;;
96
+ Failed|Stopped)
97
+ FAILURE_REASON=$(aws sagemaker describe-ai-benchmark-job \
98
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
99
+ --region "${AWS_REGION}" \
100
+ --query 'FailureReason' \
101
+ --output text 2>/dev/null) || FAILURE_REASON="unknown"
102
+ echo "⚠️ Previous benchmark job ${EXISTING_STATUS}: ${BENCHMARK_JOB_NAME}"
103
+ if [ "${EXISTING_STATUS}" = "Failed" ] && [ -n "${FAILURE_REASON}" ] && [ "${FAILURE_REASON}" != "None" ]; then
104
+ echo " Reason: ${FAILURE_REASON}"
105
+ fi
106
+ echo " Use --force to start a new benchmark."
107
+ exit 1
108
+ ;;
109
+ *)
110
+ # Job doesn't exist or can't be described — proceed with new job
111
+ ;;
112
+ esac
113
+ fi
114
+
115
+ # ── Configuration ─────────────────────────────────────────────────────────────
116
+ WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config"
117
+ if [ "${RESUME_EXISTING}" = false ]; then
118
+ BENCHMARK_JOB_NAME="${PROJECT_NAME}-benchmark-$(date +%Y%m%d-%H%M%S)"
119
+ fi
120
+ POLL_INTERVAL=30
121
+ MAX_POLL_ATTEMPTS=60 # 30 minutes max (60 * 30s)
122
+
123
+ echo "📊 SageMaker AI Benchmark"
124
+ echo " Project: ${PROJECT_NAME}"
125
+ echo " Endpoint: ${ENDPOINT_NAME:-not set}"
126
+ echo " Inference Component: ${INFERENCE_COMPONENT_NAME:-not set}"
127
+ echo " Concurrency: ${BENCHMARK_CONCURRENCY}"
128
+ echo " Input tokens (mean): ${BENCHMARK_INPUT_TOKENS_MEAN}"
129
+ echo " Output tokens (mean): ${BENCHMARK_OUTPUT_TOKENS_MEAN}"
130
+ echo " Streaming: ${BENCHMARK_STREAMING}"
131
+ if [ -n "${BENCHMARK_REQUEST_COUNT:-}" ]; then
132
+ echo " Request count: ${BENCHMARK_REQUEST_COUNT}"
133
+ fi
134
+ echo " S3 output: ${BENCHMARK_S3_OUTPUT_PATH}"
135
+ echo ""
136
+
137
+ # ── Pre-flight check: Verify endpoint is InService ────────────────────────────
138
+ if [ "${RESUME_EXISTING}" = false ]; then
139
+
140
+ echo "🔍 Pre-flight: Verifying endpoint status..."
141
+
142
+ if [ -z "${ENDPOINT_NAME:-}" ]; then
143
+ echo "❌ ENDPOINT_NAME is not set in do/config"
144
+ echo " Deploy your endpoint first: ./do/deploy"
145
+ exit 1
146
+ fi
147
+
148
+ ENDPOINT_STATUS=$(aws sagemaker describe-endpoint \
149
+ --endpoint-name "${ENDPOINT_NAME}" \
150
+ --region "${AWS_REGION}" \
151
+ --query 'EndpointStatus' \
152
+ --output text 2>/dev/null) || {
153
+ echo "❌ Failed to describe endpoint: ${ENDPOINT_NAME}"
154
+ echo " Check that the endpoint exists and your AWS credentials are valid."
155
+ exit 1
156
+ }
157
+
158
+ if [ "${ENDPOINT_STATUS}" != "InService" ]; then
159
+ echo "❌ Endpoint is not InService (current status: ${ENDPOINT_STATUS})"
160
+ echo " The endpoint must be InService before running a benchmark."
161
+ echo " Check status: aws sagemaker describe-endpoint --endpoint-name ${ENDPOINT_NAME} --region ${AWS_REGION}"
162
+ exit 1
163
+ fi
164
+
165
+ echo "✅ Endpoint is InService: ${ENDPOINT_NAME}"
166
+
167
+ # ── Pre-flight check: Ensure S3 output bucket exists ──────────────────────────
168
+ echo "🔍 Pre-flight: Checking S3 output bucket..."
169
+
170
+ BENCHMARK_S3_BUCKET=$(echo "${BENCHMARK_S3_OUTPUT_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
171
+
172
+ if ! aws s3api head-bucket --bucket "${BENCHMARK_S3_BUCKET}" --region "${AWS_REGION}" 2>/dev/null; then
173
+ echo "📦 Creating S3 bucket: ${BENCHMARK_S3_BUCKET}"
174
+ if [ "${AWS_REGION}" = "us-east-1" ]; then
175
+ if ! aws s3api create-bucket \
176
+ --bucket "${BENCHMARK_S3_BUCKET}" \
177
+ --region "${AWS_REGION}"; then
178
+ echo "❌ Failed to create S3 bucket: ${BENCHMARK_S3_BUCKET}"
179
+ exit 1
180
+ fi
181
+ else
182
+ if ! aws s3api create-bucket \
183
+ --bucket "${BENCHMARK_S3_BUCKET}" \
184
+ --region "${AWS_REGION}" \
185
+ --create-bucket-configuration LocationConstraint="${AWS_REGION}"; then
186
+ echo "❌ Failed to create S3 bucket: ${BENCHMARK_S3_BUCKET}"
187
+ exit 1
188
+ fi
189
+ fi
190
+ echo "✅ S3 bucket created: ${BENCHMARK_S3_BUCKET}"
191
+ else
192
+ echo "✅ S3 bucket exists: ${BENCHMARK_S3_BUCKET}"
193
+ fi
194
+
195
+ # ── Pre-flight check: Ensure Secrets Manager secret for HF token ──────────────
196
+ # The benchmarking service requires a Secrets Manager ARN for tokenizer access.
197
+ # If HF_TOKEN is available (plaintext or resolved from ARN), store it in Secrets Manager.
198
+ SECRET_ARN=""
199
+
200
+ if [ -n "${HF_TOKEN_ARN:-}" ]; then
201
+ # Already using Secrets Manager ARN — use it directly
202
+ SECRET_ARN="${HF_TOKEN_ARN}"
203
+ echo "✅ Using existing Secrets Manager ARN for HF token: ${SECRET_ARN}"
204
+ elif [ -n "${HF_TOKEN:-}" ]; then
205
+ # Plaintext HF token provided — store in Secrets Manager for the benchmark service
206
+ SECRET_NAME="ml-container-creator/${PROJECT_NAME}/hf-token"
207
+ echo "🔐 Pre-flight: Ensuring Secrets Manager secret for HF token..."
208
+
209
+ if ! aws secretsmanager describe-secret --secret-id "$SECRET_NAME" --region "$AWS_REGION" 2>/dev/null; then
210
+ echo " Creating Secrets Manager secret: ${SECRET_NAME}"
211
+ aws secretsmanager create-secret \
212
+ --name "$SECRET_NAME" \
213
+ --secret-string "$HF_TOKEN" \
214
+ --region "$AWS_REGION" > /dev/null || {
215
+ echo "❌ Failed to create Secrets Manager secret"
216
+ exit 1
217
+ }
218
+ else
219
+ echo " Updating Secrets Manager secret: ${SECRET_NAME}"
220
+ aws secretsmanager put-secret-value \
221
+ --secret-id "$SECRET_NAME" \
222
+ --secret-string "$HF_TOKEN" \
223
+ --region "$AWS_REGION" > /dev/null || {
224
+ echo "❌ Failed to update Secrets Manager secret"
225
+ exit 1
226
+ }
227
+ fi
228
+
229
+ SECRET_ARN=$(aws secretsmanager describe-secret \
230
+ --secret-id "$SECRET_NAME" \
231
+ --region "$AWS_REGION" \
232
+ --query 'ARN' \
233
+ --output text)
234
+ echo "✅ HF token stored in Secrets Manager: ${SECRET_ARN}"
235
+ else
236
+ echo "⚠️ No HF_TOKEN provided — tokenizer-based metrics (TTFT, ITL) may be unavailable"
237
+ fi
238
+
239
+ echo ""
240
+
241
+ # ── Step 1: Create AI Workload Config ─────────────────────────────────────────
242
+ # Build the inline workload spec JSON from do/config variables.
243
+ # The workload spec defines benchmark type, parameters, tooling, and secrets.
244
+ echo "⚙️ Step 1: Creating AI Workload Config: ${WORKLOAD_CONFIG_NAME}"
245
+
246
+ # Build parameters block
247
+ PARAMS_JSON="{\"prompt_input_tokens_mean\":${BENCHMARK_INPUT_TOKENS_MEAN},\"output_tokens_mean\":${BENCHMARK_OUTPUT_TOKENS_MEAN},\"concurrency\":${BENCHMARK_CONCURRENCY},\"streaming\":${BENCHMARK_STREAMING},\"tokenizer\":\"${MODEL_NAME}\""
248
+
249
+ # Add optional request_count if specified
250
+ if [ -n "${BENCHMARK_REQUEST_COUNT:-}" ]; then
251
+ PARAMS_JSON="${PARAMS_JSON},\"request_count\":${BENCHMARK_REQUEST_COUNT}"
252
+ fi
253
+
254
+ PARAMS_JSON="${PARAMS_JSON}}"
255
+
256
+ # Build secrets block (only if HF token is available)
257
+ SECRETS_JSON=""
258
+ if [ -n "${SECRET_ARN}" ]; then
259
+ SECRETS_JSON=",\"secrets\":{\"hf_token\":\"${SECRET_ARN}\"}"
260
+ fi
261
+
262
+ # Assemble full workload spec (inline YAML/JSON string for the WorkloadSpec.Inline field)
263
+ WORKLOAD_SPEC="{\"benchmark\":{\"type\":\"aiperf\"},\"parameters\":${PARAMS_JSON},\"tooling\":{\"api_standard\":\"openai\"}${SECRETS_JSON}}"
264
+
265
+ # Wrap in the API's expected structure: --ai-workload-configs '{"WorkloadSpec":{"Inline":"..."}}'
266
+ # The Inline field takes the spec as a JSON-encoded string
267
+ WORKLOAD_CONFIGS="{\"WorkloadSpec\":{\"Inline\":$(echo "${WORKLOAD_SPEC}" | python3 -c 'import sys,json; print(json.dumps(sys.stdin.read().strip()))')}}"
268
+
269
+ # Workload config idempotency: reuse if params match, recreate if they differ
270
+ EXISTING_CONFIG_SPEC=""
271
+ if aws sagemaker describe-ai-workload-config \
272
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
273
+ --region "${AWS_REGION}" 2>/dev/null >/dev/null; then
274
+ EXISTING_CONFIG_SPEC=$(aws sagemaker describe-ai-workload-config \
275
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
276
+ --region "${AWS_REGION}" \
277
+ --query 'AIWorkloadConfigs.WorkloadSpec.Inline' \
278
+ --output text 2>/dev/null) || EXISTING_CONFIG_SPEC=""
279
+ fi
280
+
281
+ if [ -n "${EXISTING_CONFIG_SPEC}" ]; then
282
+ # Compare existing spec with desired spec (normalize for comparison)
283
+ EXISTING_NORMALIZED=$(echo "${EXISTING_CONFIG_SPEC}" | python3 -c "import sys,json; print(json.dumps(json.loads(sys.stdin.read()), sort_keys=True))" 2>/dev/null) || EXISTING_NORMALIZED=""
284
+ DESIRED_NORMALIZED=$(echo "${WORKLOAD_SPEC}" | python3 -c "import sys,json; print(json.dumps(json.loads(sys.stdin.read()), sort_keys=True))" 2>/dev/null) || DESIRED_NORMALIZED=""
285
+
286
+ if [ "${EXISTING_NORMALIZED}" = "${DESIRED_NORMALIZED}" ]; then
287
+ echo " ✅ Existing workload config matches current parameters — reusing"
288
+ else
289
+ echo " ⚠️ Workload config parameters changed — recreating..."
290
+ aws sagemaker delete-ai-workload-config \
291
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
292
+ --region "${AWS_REGION}" || true
293
+ CREATE_WORKLOAD_CONFIG=true
294
+ fi
295
+ else
296
+ CREATE_WORKLOAD_CONFIG=true
297
+ fi
298
+
299
+ if [ "${CREATE_WORKLOAD_CONFIG:-true}" = "true" ]; then
300
+ # Create the workload config
301
+ if ! aws sagemaker create-ai-workload-config \
302
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
303
+ --ai-workload-configs "${WORKLOAD_CONFIGS}" \
304
+ --region "${AWS_REGION}"; then
305
+ echo "❌ Failed to create AI Workload Config"
306
+ echo " This may indicate the SageMaker AI Benchmarking API is not available in region: ${AWS_REGION}"
307
+ echo " Check: https://docs.aws.amazon.com/sagemaker/latest/dg/regions-quotas.html"
308
+ exit 1
309
+ fi
310
+ echo "✅ Workload config created: ${WORKLOAD_CONFIG_NAME}"
311
+ fi
312
+
313
+ # Persist workload config name for resume
314
+ _update_benchmark_var "BENCHMARK_WORKLOAD_CONFIG_NAME" "${WORKLOAD_CONFIG_NAME}"
315
+ echo ""
316
+
317
+ # ── Step 2: Create AI Benchmark Job ──────────────────────────────────────────
318
+ # Target the deployed endpoint and inference component with the workload config.
319
+ echo "🚀 Step 2: Creating AI Benchmark Job: ${BENCHMARK_JOB_NAME}"
320
+
321
+ BENCHMARK_TARGET="{\"Endpoint\":{\"Identifier\":\"${ENDPOINT_NAME}\",\"InferenceComponents\":[{\"Identifier\":\"${INFERENCE_COMPONENT_NAME}\"}]}}"
322
+ OUTPUT_CONFIG="{\"S3OutputLocation\":\"${BENCHMARK_S3_OUTPUT_PATH}\"}"
323
+
324
+ if ! aws sagemaker create-ai-benchmark-job \
325
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
326
+ --benchmark-target "${BENCHMARK_TARGET}" \
327
+ --output-config "${OUTPUT_CONFIG}" \
328
+ --ai-workload-config-identifier "${WORKLOAD_CONFIG_NAME}" \
329
+ --role-arn "${ROLE_ARN}" \
330
+ --region "${AWS_REGION}"; then
331
+ echo "❌ Failed to create AI Benchmark Job"
332
+ echo " Check that:"
333
+ echo " • The execution role has sagemaker:CreateAIBenchmarkJob permission"
334
+ echo " • The endpoint and inference component are valid"
335
+ echo " • The S3 output path is accessible: ${BENCHMARK_S3_OUTPUT_PATH}"
336
+ exit 1
337
+ fi
338
+
339
+ echo "✅ Benchmark job created: ${BENCHMARK_JOB_NAME}"
340
+
341
+ # Save job name to do/config for idempotency on re-run
342
+ _update_benchmark_var "BENCHMARK_JOB_NAME" "${BENCHMARK_JOB_NAME}"
343
+
344
+ echo ""
345
+
346
+ fi # end of RESUME_EXISTING=false block
347
+
348
+ # ── Step 3: Poll for completion ───────────────────────────────────────────────
349
+ # Poll describe-ai-benchmark-job every POLL_INTERVAL seconds until terminal state.
350
+ # Terminal states: Completed, Failed, Stopped
351
+
352
+ # Skip polling if we already know the job completed (resumed a finished job)
353
+ if [ "${JOB_STATUS:-}" != "Completed" ] && [ "${JOB_STATUS:-}" != "Failed" ] && [ "${JOB_STATUS:-}" != "Stopped" ]; then
354
+
355
+ echo "⏳ Step 3: Waiting for benchmark to complete..."
356
+ echo " Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 30 min)"
357
+ echo ""
358
+
359
+ POLL_COUNT=0
360
+ JOB_STATUS=""
361
+
362
+ while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
363
+ JOB_STATUS=$(aws sagemaker describe-ai-benchmark-job \
364
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
365
+ --region "${AWS_REGION}" \
366
+ --query 'AIBenchmarkJobStatus' \
367
+ --output text 2>/dev/null) || {
368
+ echo "⚠️ Failed to describe benchmark job (credentials may have expired)"
369
+ echo " Re-run to check status manually:"
370
+ echo " aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
371
+ exit 1
372
+ }
373
+
374
+ case "${JOB_STATUS}" in
375
+ Completed)
376
+ echo "✅ Benchmark completed successfully!"
377
+ break
378
+ ;;
379
+ Failed)
380
+ echo "❌ Benchmark job failed"
381
+ break
382
+ ;;
383
+ Stopped)
384
+ echo "⚠️ Benchmark job was stopped"
385
+ break
386
+ ;;
387
+ *)
388
+ POLL_COUNT=$((POLL_COUNT + 1))
389
+ ELAPSED=$((POLL_COUNT * POLL_INTERVAL))
390
+ echo " $(date +%H:%M:%S) Status: ${JOB_STATUS} (${ELAPSED}s elapsed)"
391
+ sleep ${POLL_INTERVAL}
392
+ ;;
393
+ esac
394
+ done
395
+
396
+ # Check for timeout
397
+ if [ ${POLL_COUNT} -ge ${MAX_POLL_ATTEMPTS} ]; then
398
+ echo ""
399
+ echo "⚠️ Benchmark timed out after 30 minutes (status: ${JOB_STATUS})"
400
+ echo " The job may still be running. Re-run ./do/benchmark to resume waiting."
401
+ echo " Or check status manually:"
402
+ echo " aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
403
+ exit 1
404
+ fi
405
+
406
+ fi # end of polling conditional
407
+
408
+ echo ""
409
+
410
+ # ── Step 4: Display results ───────────────────────────────────────────────────
411
+ if [ "${JOB_STATUS}" = "Completed" ]; then
412
+ # Persist results locally to benchmarks/<job-name>/
413
+ PROJECT_ROOT="${SCRIPT_DIR}/.."
414
+ LOCAL_RESULTS_DIR="${PROJECT_ROOT}/benchmarks/${BENCHMARK_JOB_NAME}"
415
+ RESULTS_FILE="${LOCAL_RESULTS_DIR}/results.json"
416
+
417
+ # Check if results already exist locally (idempotency: skip S3 download)
418
+ if [ -f "${RESULTS_FILE}" ]; then
419
+ echo "📥 Step 4: Results already available locally"
420
+ RESULTS_DOWNLOADED=true
421
+ else
422
+ echo "📥 Step 4: Downloading benchmark results..."
423
+
424
+ RESULTS_S3_PATH=$(aws sagemaker describe-ai-benchmark-job \
425
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
426
+ --region "${AWS_REGION}" \
427
+ --query 'OutputConfig.S3OutputLocation' \
428
+ --output text 2>/dev/null)
429
+
430
+ # Create local benchmarks directory
431
+ mkdir -p "${LOCAL_RESULTS_DIR}"
432
+
433
+ # The benchmark service writes results into a subdirectory (e.g., bmk-prod-<job>-<hash>/)
434
+ # under the S3OutputLocation. We use multiple strategies to locate the results file.
435
+ RESULTS_DOWNLOADED=false
436
+
437
+ # Ensure RESULTS_S3_PATH has a trailing slash for consistent path joining
438
+ RESULTS_S3_PATH="${RESULTS_S3_PATH%/}/"
439
+
440
+ # Strategy 1: Sync the entire output tree locally, then find results
441
+ # This is the most reliable approach — handles any subdirectory structure
442
+ echo " Syncing results from S3..."
443
+ if aws s3 sync "${RESULTS_S3_PATH}" "${LOCAL_RESULTS_DIR}/" --region "${AWS_REGION}" 2>/dev/null; then
444
+ # Look for any JSON file in the synced directory tree
445
+ FOUND_FILE=$(find "${LOCAL_RESULTS_DIR}" -name "*.json" -type f 2>/dev/null | head -1)
446
+ if [ -n "${FOUND_FILE}" ]; then
447
+ # If the found file isn't already at our canonical path, copy it there
448
+ if [ "${FOUND_FILE}" != "${RESULTS_FILE}" ]; then
449
+ cp "${FOUND_FILE}" "${RESULTS_FILE}"
450
+ fi
451
+ RESULTS_DOWNLOADED=true
452
+ fi
453
+ fi
454
+
455
+ # Strategy 2: If sync found nothing, try listing and downloading individual files
456
+ # This handles cases where s3 sync silently fails (permissions, empty prefix match)
457
+ if [ "${RESULTS_DOWNLOADED}" = false ]; then
458
+ echo " Searching for results files..."
459
+ RESULTS_BUCKET=$(echo "${RESULTS_S3_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
460
+ RESULTS_PREFIX=$(echo "${RESULTS_S3_PATH}" | sed "s|s3://${RESULTS_BUCKET}/||")
461
+
462
+ # List all objects under the output path and find data files
463
+ # aws s3api list-objects-v2 is more reliable than aws s3 ls --recursive
464
+ FOUND_KEY=$(aws s3api list-objects-v2 \
465
+ --bucket "${RESULTS_BUCKET}" \
466
+ --prefix "${RESULTS_PREFIX}" \
467
+ --region "${AWS_REGION}" \
468
+ --query 'Contents[].Key' \
469
+ --output text 2>/dev/null \
470
+ | tr '\t' '\n' \
471
+ | grep -E '\.(json|jsonl|csv)$' \
472
+ | head -1)
473
+
474
+ if [ -n "${FOUND_KEY}" ] && [ "${FOUND_KEY}" != "None" ]; then
475
+ if aws s3 cp "s3://${RESULTS_BUCKET}/${FOUND_KEY}" "${RESULTS_FILE}" --region "${AWS_REGION}" 2>/dev/null; then
476
+ RESULTS_DOWNLOADED=true
477
+ fi
478
+ fi
479
+ fi
480
+
481
+ # Strategy 3: If still nothing, try direct path patterns the service might use
482
+ if [ "${RESULTS_DOWNLOADED}" = false ]; then
483
+ for PATTERN in "results.json" "benchmark_results.json" "output.json"; do
484
+ if aws s3 cp "${RESULTS_S3_PATH}${PATTERN}" "${RESULTS_FILE}" --region "${AWS_REGION}" 2>/dev/null; then
485
+ RESULTS_DOWNLOADED=true
486
+ break
487
+ fi
488
+ done
489
+ fi
490
+ fi
491
+
492
+ if [ "${RESULTS_DOWNLOADED}" = true ]; then
493
+ echo "✅ Results downloaded"
494
+ echo ""
495
+
496
+ # Display summary table
497
+ echo "╔══════════════════════════════════════════════════════════════════╗"
498
+ echo "║ SageMaker AI Benchmark Results ║"
499
+ echo "╠══════════════════════════════════════════════════════════════════╣"
500
+ echo "║ Job: ${BENCHMARK_JOB_NAME}"
501
+ echo "║ Endpoint: ${ENDPOINT_NAME}"
502
+ echo "╠══════════════════════════════════════════════════════════════════╣"
503
+
504
+ # Parse and display metrics using built-in tools
505
+ # Extract key metrics from the results JSON
506
+ if command -v python3 &>/dev/null; then
507
+ python3 -c "
508
+ import json, sys
509
+
510
+ try:
511
+ with open('${RESULTS_FILE}') as f:
512
+ data = json.load(f)
513
+
514
+ metrics = data if isinstance(data, dict) else {}
515
+
516
+ # Helper to safely get nested values
517
+ def get_metric(d, *keys):
518
+ for k in keys:
519
+ if isinstance(d, dict):
520
+ d = d.get(k, 'N/A')
521
+ else:
522
+ return 'N/A'
523
+ return d
524
+
525
+ # Display throughput
526
+ throughput = get_metric(metrics, 'request_throughput')
527
+ output_throughput = get_metric(metrics, 'output_token_throughput')
528
+ print(f'║ Request Throughput: {throughput} req/s')
529
+ print(f'║ Output Token Throughput: {output_throughput} tokens/s')
530
+ print('║')
531
+
532
+ # Display request latency
533
+ lat_p50 = get_metric(metrics, 'request_latency', 'p50')
534
+ lat_p90 = get_metric(metrics, 'request_latency', 'p90')
535
+ lat_p99 = get_metric(metrics, 'request_latency', 'p99')
536
+ print(f'║ Request Latency (ms):')
537
+ print(f'║ P50: {lat_p50} P90: {lat_p90} P99: {lat_p99}')
538
+ print('║')
539
+
540
+ # Display TTFT (time to first token)
541
+ ttft_p50 = get_metric(metrics, 'time_to_first_token', 'p50')
542
+ ttft_p90 = get_metric(metrics, 'time_to_first_token', 'p90')
543
+ ttft_p99 = get_metric(metrics, 'time_to_first_token', 'p99')
544
+ print(f'║ Time to First Token (ms):')
545
+ print(f'║ P50: {ttft_p50} P90: {ttft_p90} P99: {ttft_p99}')
546
+ print('║')
547
+
548
+ # Display ITL (inter-token latency)
549
+ itl_p50 = get_metric(metrics, 'inter_token_latency', 'p50')
550
+ itl_p90 = get_metric(metrics, 'inter_token_latency', 'p90')
551
+ itl_p99 = get_metric(metrics, 'inter_token_latency', 'p99')
552
+ print(f'║ Inter-Token Latency (ms):')
553
+ print(f'║ P50: {itl_p50} P90: {itl_p90} P99: {itl_p99}')
554
+
555
+ except Exception as e:
556
+ print(f'║ ⚠️ Could not parse results: {e}')
557
+ print(f'║ Raw file: ${RESULTS_FILE}')
558
+ "
559
+ else
560
+ # Fallback: display raw JSON if python3 is not available
561
+ echo "║ (python3 not available — showing raw results)"
562
+ echo "║"
563
+ cat "${RESULTS_FILE}" | head -50
564
+ fi
565
+
566
+ echo "╚══════════════════════════════════════════════════════════════════╝"
567
+ echo ""
568
+ echo "📁 Results saved to: benchmarks/${BENCHMARK_JOB_NAME}/"
569
+ echo "☁️ S3 results: ${RESULTS_S3_PATH:-${BENCHMARK_S3_OUTPUT_PATH}}"
570
+ else
571
+ echo "⚠️ Could not download results from S3"
572
+ echo " The benchmark completed but results could not be located."
573
+ echo ""
574
+ echo " Debug — list objects at the output path:"
575
+ echo " aws s3 ls ${RESULTS_S3_PATH} --recursive --region ${AWS_REGION}"
576
+ echo ""
577
+ echo " Or list via API:"
578
+ RESULTS_BUCKET=$(echo "${RESULTS_S3_PATH}" | sed 's|s3://||' | cut -d'/' -f1)
579
+ RESULTS_PREFIX=$(echo "${RESULTS_S3_PATH}" | sed "s|s3://${RESULTS_BUCKET}/||")
580
+ echo " aws s3api list-objects-v2 --bucket ${RESULTS_BUCKET} --prefix ${RESULTS_PREFIX} --region ${AWS_REGION}"
581
+ echo ""
582
+ # Show what's actually there to help debug
583
+ echo " Objects found at output path:"
584
+ aws s3api list-objects-v2 \
585
+ --bucket "${RESULTS_BUCKET}" \
586
+ --prefix "${RESULTS_PREFIX}" \
587
+ --region "${AWS_REGION}" \
588
+ --query 'Contents[].{Key: Key, Size: Size}' \
589
+ --output table 2>/dev/null || echo " (could not list objects)"
590
+ fi
591
+
592
+ elif [ "${JOB_STATUS}" = "Failed" ]; then
593
+ # Display failure reason
594
+ echo "❌ Step 4: Benchmark job failed"
595
+ FAILURE_REASON=$(aws sagemaker describe-ai-benchmark-job \
596
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
597
+ --region "${AWS_REGION}" \
598
+ --query 'FailureReason' \
599
+ --output text 2>/dev/null)
600
+ echo " Reason: ${FAILURE_REASON}"
601
+ echo ""
602
+ echo " Debug:"
603
+ echo " aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
604
+
605
+ elif [ "${JOB_STATUS}" = "Stopped" ]; then
606
+ echo "⚠️ Step 4: Benchmark job was stopped before completion"
607
+ echo " No results available."
608
+ fi
609
+
610
+ # ── Optional cleanup (--clean flag) ───────────────────────────────────────────
611
+ # Delete workload config and benchmark job to avoid resource accumulation.
612
+ if [ "${CLEAN_AFTER}" = true ]; then
613
+ echo ""
614
+ echo "🧹 Cleaning up benchmark resources (--clean)..."
615
+
616
+ # Delete workload config
617
+ if aws sagemaker delete-ai-workload-config \
618
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
619
+ --region "${AWS_REGION}" 2>/dev/null; then
620
+ echo " ✓ Deleted workload config: ${WORKLOAD_CONFIG_NAME}"
621
+ else
622
+ echo " ⚠️ Could not delete workload config: ${WORKLOAD_CONFIG_NAME}"
623
+ fi
624
+
625
+ # Delete benchmark job (must be in terminal state)
626
+ if aws sagemaker delete-ai-benchmark-job \
627
+ --ai-benchmark-job-name "${BENCHMARK_JOB_NAME}" \
628
+ --region "${AWS_REGION}" 2>/dev/null; then
629
+ echo " ✓ Deleted benchmark job: ${BENCHMARK_JOB_NAME}"
630
+ else
631
+ echo " ⚠️ Could not delete benchmark job: ${BENCHMARK_JOB_NAME}"
632
+ fi
633
+
634
+ echo "✅ Cleanup complete"
635
+ fi
636
+
637
+ echo ""
638
+ echo "📋 Summary:"
639
+ echo " Workload Config: ${WORKLOAD_CONFIG_NAME}"
640
+ echo " Benchmark Job: ${BENCHMARK_JOB_NAME}"
641
+ echo " Status: ${JOB_STATUS}"
642
+ echo ""
643
+ if [ "${CLEAN_AFTER}" = false ]; then
644
+ echo "🧹 To clean up benchmark resources:"
645
+ echo " ./do/clean benchmark"
646
+ fi