@aws/ml-container-creator 0.13.4 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +23 -5
  2. package/config/parameter-schema-v2.json +32 -4
  3. package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
  4. package/infra/ci-harness/package-lock.json +122 -116
  5. package/infra/ci-harness/package.json +1 -1
  6. package/package.json +5 -3
  7. package/pyproject.toml +21 -0
  8. package/requirements.txt +19 -0
  9. package/servers/instance-sizer/index.js +72 -4
  10. package/servers/instance-sizer/lib/model-resolver.js +28 -2
  11. package/src/app.js +17 -0
  12. package/src/lib/bootstrap-command-handler.js +33 -23
  13. package/src/lib/config-loader.js +18 -0
  14. package/src/lib/config-manager.js +6 -1
  15. package/src/lib/dataset-slug.js +152 -0
  16. package/src/lib/generated/cli-options.js +9 -3
  17. package/src/lib/generated/parameter-matrix.js +14 -3
  18. package/src/lib/generated/validation-rules.js +1 -1
  19. package/src/lib/mcp-query-runner.js +6 -0
  20. package/src/lib/prompt-runner.js +5 -0
  21. package/src/lib/prompts/feature-prompts.js +1 -1
  22. package/src/lib/template-manager.js +0 -7
  23. package/src/lib/template-variable-resolver.js +51 -1
  24. package/src/lib/tune-config-state.js +14 -1
  25. package/templates/do/.adapter_helper.py +451 -0
  26. package/templates/do/.benchmark_writer.py +22 -0
  27. package/templates/do/.register_helper.py +1163 -0
  28. package/templates/do/.stage_helper.py +419 -0
  29. package/templates/do/.tune_helper.py +379 -65
  30. package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
  31. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  32. package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
  33. package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
  34. package/templates/do/adapter +427 -27
  35. package/templates/do/add-ic +85 -3
  36. package/templates/do/benchmark +173 -15
  37. package/templates/do/config +24 -0
  38. package/templates/do/lib/inference-component.sh +56 -3
  39. package/templates/do/lib/profile.sh +5 -0
  40. package/templates/do/register +552 -6
  41. package/templates/do/stage +91 -272
  42. package/templates/do/test +12 -2
  43. package/templates/do/tune +264 -12
@@ -123,6 +123,71 @@ if [ -n "${MODEL_DATA}" ]; then
123
123
  fi
124
124
  echo ""
125
125
 
126
+ # ============================================================
127
+ # Query model registry for available versions (AC-5.1)
128
+ # Non-intrusive: if registry query fails, skip silently
129
+ # ============================================================
130
+ REGISTRY_MODELS_JSON=""
131
+ REGISTRY_MODEL_COUNT=0
132
+ REGISTRY_SELECTED_MODEL_DATA=""
133
+ REGISTRY_SELECTED_IMAGE=""
134
+
135
+ if [ -z "${MODEL_DATA}" ]; then
136
+ # Only query registry if MODEL_DATA was not already provided via --from-tune or --model-data
137
+ if [ -t 0 ]; then
138
+ # Interactive mode: query registry for available models
139
+ _registry_json=$(python3 "${SCRIPT_DIR}/.register_helper.py" list-models \
140
+ --project-name "${PROJECT_NAME}" \
141
+ --region "${AWS_REGION:-us-east-1}" 2>/dev/null || echo "")
142
+
143
+ _registry_line=$(echo "${_registry_json}" | grep -E '^\{' | tail -1)
144
+
145
+ if [ -n "${_registry_line}" ]; then
146
+ REGISTRY_MODEL_COUNT=$(echo "${_registry_line}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(len(data.get('models',[])))" 2>/dev/null || echo "0")
147
+
148
+ if [ "${REGISTRY_MODEL_COUNT}" -gt 0 ]; then
149
+ REGISTRY_MODELS_JSON="${_registry_line}"
150
+
151
+ echo "📦 Available registered models:"
152
+ echo ""
153
+ printf ' %-4s%-10s%-12s%-38s%s\n' "#" "VERSION" "CONFIG" "MODEL" "INSTANCE"
154
+
155
+ _i=0
156
+ while [ "${_i}" -lt "${REGISTRY_MODEL_COUNT}" ]; do
157
+ _v=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['models'][${_i}].get('version','?'))" 2>/dev/null)
158
+ _c=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['models'][${_i}].get('deploymentConfig','?'))" 2>/dev/null)
159
+ _m=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); m=data['models'][${_i}].get('modelName','?'); print(m[:36]+'…' if len(m)>36 else m)" 2>/dev/null)
160
+ _inst=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['models'][${_i}].get('instanceType','?'))" 2>/dev/null)
161
+
162
+ _num=$((_i + 1))
163
+ printf ' %-4s%-10s%-12s%-38s%s\n' "${_num}" "v${_v}" "${_c}" "${_m}" "${_inst}"
164
+ _i=$((_i + 1))
165
+ done
166
+
167
+ echo ""
168
+ read -p "Select a model (1-${REGISTRY_MODEL_COUNT}) or press Enter to specify manually: " _selection
169
+
170
+ if [ -n "${_selection}" ]; then
171
+ # Validate selection
172
+ if echo "${_selection}" | grep -qE '^[0-9]+$' && [ "${_selection}" -ge 1 ] && [ "${_selection}" -le "${REGISTRY_MODEL_COUNT}" ]; then
173
+ _sel_idx=$((_selection - 1))
174
+ REGISTRY_SELECTED_MODEL_DATA=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['models'][${_sel_idx}].get('modelDataUrl',''))" 2>/dev/null || echo "")
175
+ REGISTRY_SELECTED_IMAGE=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); img=data['models'][${_sel_idx}].get('containerImage',''); print(img.split('/')[-1] if '/' in img else img)" 2>/dev/null || echo "")
176
+
177
+ echo ""
178
+ echo "✅ Selected registry model v$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['models'][${_sel_idx}].get('version','?'))" 2>/dev/null)"
179
+ echo ""
180
+ else
181
+ echo " ⚠️ Invalid selection, proceeding with manual entry."
182
+ echo ""
183
+ fi
184
+ fi
185
+ # If user pressed Enter without selection, proceed with manual entry
186
+ fi
187
+ fi
188
+ fi
189
+ fi
190
+
126
191
  # ============================================================
127
192
  # Prompt for IC name (if not provided as argument)
128
193
  # ============================================================
@@ -169,12 +234,29 @@ else
169
234
  fi
170
235
 
171
236
  # ============================================================
172
- # Prompt for image tag
237
+ # Prompt for image tag (AC-5.2, AC-5.3: pre-fill from registry, user can override)
173
238
  # ============================================================
174
- DEFAULT_IMAGE_TAG="${PROJECT_NAME}-latest"
239
+ if [ -n "${REGISTRY_SELECTED_IMAGE}" ]; then
240
+ DEFAULT_IMAGE_TAG="${REGISTRY_SELECTED_IMAGE}"
241
+ else
242
+ DEFAULT_IMAGE_TAG="${PROJECT_NAME}-latest"
243
+ fi
175
244
  read -p "Image tag [${DEFAULT_IMAGE_TAG}]: " IC_IMAGE_TAG
176
245
  IC_IMAGE_TAG="${IC_IMAGE_TAG:-${DEFAULT_IMAGE_TAG}}"
177
246
 
247
+ # ============================================================
248
+ # Prompt for model data URL (AC-5.2, AC-5.3: pre-fill from registry, user can override)
249
+ # ============================================================
250
+ if [ -z "${MODEL_DATA}" ] && [ -n "${REGISTRY_SELECTED_MODEL_DATA}" ]; then
251
+ # Pre-fill from registry selection — user can override (AC-5.3)
252
+ read -p "Model data URL [${REGISTRY_SELECTED_MODEL_DATA}]: " _model_data_input
253
+ MODEL_DATA="${_model_data_input:-${REGISTRY_SELECTED_MODEL_DATA}}"
254
+ elif [ -z "${MODEL_DATA}" ]; then
255
+ # No registry selection — offer manual entry (optional)
256
+ read -p "Model data URL (S3 URI, optional — press Enter to skip): " _model_data_input
257
+ MODEL_DATA="${_model_data_input:-}"
258
+ fi
259
+
178
260
  # ============================================================
179
261
  # Prompt for GPU count
180
262
  # ============================================================
@@ -232,7 +314,7 @@ export IC_MIN_MEMORY_MB=${IC_MIN_MEMORY_MB}
232
314
  export IC_STARTUP_TIMEOUT=900
233
315
  EOF
234
316
 
235
- # Add model data if provided (from --from-tune or --model-data)
317
+ # Add model data if provided (from --from-tune, --model-data, or registry selection)
236
318
  if [ -n "${MODEL_DATA}" ]; then
237
319
  cat >> "${IC_CONF_PATH}" <<EOF
238
320
  export IC_MODEL_DATA="${MODEL_DATA}"
@@ -12,10 +12,12 @@ set -o pipefail
12
12
  # ── Source project configuration ──────────────────────────────────────────────
13
13
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
14
14
  source "${SCRIPT_DIR}/config"
15
+ source "${SCRIPT_DIR}/lib/profile.sh"
15
16
 
16
17
  # ── Parse flags ───────────────────────────────────────────────────────────────
17
18
  CLEAN_AFTER=false
18
19
  FORCE=false
20
+ ARG_STATUS=false
19
21
  IC_ARG=""
20
22
  ADAPTER_ARG=""
21
23
  ARG_NO_STALE_WARNING=false
@@ -24,18 +26,20 @@ while [ $# -gt 0 ]; do
24
26
  case "$1" in
25
27
  --clean) CLEAN_AFTER=true; shift ;;
26
28
  --force) FORCE=true; shift ;;
29
+ --status) ARG_STATUS=true; shift ;;
27
30
  --no-stale-warning) ARG_NO_STALE_WARNING=true; shift ;;
28
31
  --workload) shift; ARG_WORKLOAD="${1:-}"; shift ;;
29
32
  --ic) shift; IC_ARG="${1:-}"; shift ;;
30
33
  --adapter) shift; ADAPTER_ARG="${1:-}"; shift ;;
31
34
  --help|-h)
32
- echo "Usage: ./do/benchmark [--workload <name>] [--ic <name>] [--adapter <name>] [--force] [--clean] [--no-stale-warning]"
35
+ echo "Usage: ./do/benchmark [--workload <name>] [--status] [--ic <name>] [--adapter <name>] [--force] [--clean]"
33
36
  echo ""
34
37
  echo "Run SageMaker AI Benchmark against the deployed endpoint."
35
38
  echo ""
36
39
  echo "Options:"
40
+ echo " --status Check job status; if completed, download results + write to Athena"
37
41
  echo " --ic <name> Benchmark a specific inference component"
38
- echo " --adapter <name> Benchmark a specific LoRA adapter IC"
42
+ echo " --adapter <name> Benchmark a specific LoRA adapter (routes via adapter IC, records adapter_name in Athena)"
39
43
  echo " --force Create a new benchmark job even if one is already running"
40
44
  echo " --clean Delete workload config and benchmark job after displaying results"
41
45
  echo " --no-stale-warning Suppress schema registry staleness warning"
@@ -45,9 +49,11 @@ while [ $# -gt 0 ]; do
45
49
  echo " --ic <name> Use IC_DEPLOYED_NAME from do/ic/<name>.conf"
46
50
  echo " (no flag) Use first IC in do/ic/ alphabetically, or legacy config"
47
51
  echo ""
48
- echo "Idempotency:"
49
- echo " If a benchmark job is already in progress, re-running without --force"
50
- echo " will resume waiting for the existing job and display its results."
52
+ echo "Adapter benchmarks are differentiated from base model runs in Athena via the adapter_name column."
53
+ echo ""
54
+ echo "Status:"
55
+ echo " After interrupting a running benchmark, use --status to check completion"
56
+ echo " and trigger results download + Athena write."
51
57
  echo ""
52
58
  echo "Prerequisites:"
53
59
  echo " • Endpoint must be deployed and InService (run ./do/deploy first)"
@@ -59,6 +65,120 @@ while [ $# -gt 0 ]; do
59
65
  done
60
66
 
61
67
 
68
+ # ── Handle --status (early exit) ─────────────────────────────────────────────
69
+ # Query the tracked benchmark job, display status, and if completed:
70
+ # download results, display metrics, and write to Athena (if not already done).
71
+ if [ "${ARG_STATUS}" = true ]; then
72
+ JOB_NAME="${BENCHMARK_JOB_NAME:-}"
73
+ if [ -z "${JOB_NAME}" ]; then
74
+ echo "❌ No benchmark job tracked"
75
+ echo " Run ./do/benchmark --workload <name> to start one."
76
+ exit 1
77
+ fi
78
+
79
+ echo "📊 Benchmark Job Status"
80
+ echo ""
81
+ echo " Job: ${JOB_NAME}"
82
+
83
+ STATUS=$(aws sagemaker describe-ai-benchmark-job \
84
+ --ai-benchmark-job-name "${JOB_NAME}" \
85
+ --region "${AWS_REGION}" \
86
+ --query 'AIBenchmarkJobStatus' \
87
+ --output text 2>/dev/null) || STATUS=""
88
+
89
+ if [ -z "${STATUS}" ]; then
90
+ echo " Status: Unknown (job not found or credentials expired)"
91
+ exit 1
92
+ fi
93
+
94
+ echo " Status: ${STATUS}"
95
+
96
+ case "${STATUS}" in
97
+ Completed)
98
+ # Check if results already exist locally
99
+ PROJECT_ROOT="${SCRIPT_DIR}/.."
100
+ LOCAL_RESULTS_DIR="${PROJECT_ROOT}/benchmarks/${JOB_NAME}"
101
+ RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1)
102
+
103
+ if [ -z "${RESULTS_JSONL}" ]; then
104
+ echo ""
105
+ echo " 📥 Downloading results..."
106
+ RESULTS_S3_PATH=$(aws sagemaker describe-ai-benchmark-job \
107
+ --ai-benchmark-job-name "${JOB_NAME}" \
108
+ --region "${AWS_REGION}" \
109
+ --query 'OutputConfig.S3OutputLocation' \
110
+ --output text 2>/dev/null)
111
+
112
+ if [ -n "${RESULTS_S3_PATH}" ]; then
113
+ mkdir -p "${LOCAL_RESULTS_DIR}/output"
114
+ aws s3 sync "${RESULTS_S3_PATH}" "${LOCAL_RESULTS_DIR}/output/" \
115
+ --region "${AWS_REGION}" --quiet
116
+ # Untar if output.tar.gz exists
117
+ tar_file=""
118
+ tar_file=$(find "${LOCAL_RESULTS_DIR}" -name "output.tar.gz" -type f 2>/dev/null | head -1)
119
+ if [ -n "${tar_file}" ]; then
120
+ # Detect whether tar has a leading directory prefix
121
+ _tar_first=""
122
+ _tar_first=$(tar -tzf "${tar_file}" 2>/dev/null | head -1)
123
+ if echo "${_tar_first}" | grep -qE '^[^/]+/$'; then
124
+ tar -xzf "${tar_file}" --strip-components=1 -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
125
+ else
126
+ tar -xzf "${tar_file}" -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
127
+ fi
128
+ fi
129
+ # Re-search after extraction
130
+ RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1)
131
+ echo " ✅ Results downloaded to: benchmarks/${JOB_NAME}/"
132
+ fi
133
+ else
134
+ echo " ✅ Results already available locally"
135
+ fi
136
+
137
+ # Write to Athena if CI bucket is configured and results exist
138
+ if [ -n "${CI_BENCHMARK_RESULTS_BUCKET:-}" ]; then
139
+ _WRITER_INPUT=""
140
+ if [ -n "${RESULTS_JSONL}" ] && [ -f "${RESULTS_JSONL}" ]; then
141
+ _WRITER_INPUT="${RESULTS_JSONL}"
142
+ else
143
+ _WRITER_INPUT=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export_aiperf.json" -type f 2>/dev/null | head -1)
144
+ fi
145
+
146
+ if [ -n "${_WRITER_INPUT}" ]; then
147
+ echo ""
148
+ echo " 📊 Writing to Athena..."
149
+ if python3 "$(dirname "${BASH_SOURCE[0]}")/.benchmark_writer.py" write \
150
+ --results-file "${_WRITER_INPUT}" \
151
+ --config-file "$(dirname "${BASH_SOURCE[0]}")/config" \
152
+ --project-name "${PROJECT_NAME}" \
153
+ --workload "${BENCHMARK_WORKLOAD:-manual}" \
154
+ --concurrency "${BENCHMARK_CONCURRENCY:-2}" \
155
+ --bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
156
+ --region "${AWS_REGION:-${REGION}}" \
157
+ ${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
158
+ echo " ✅ Results persisted to Athena"
159
+ else
160
+ echo " ⚠️ Athena write failed (non-fatal)"
161
+ fi
162
+ fi
163
+ fi
164
+ ;;
165
+ InProgress|Starting|Pending)
166
+ echo ""
167
+ echo " Job is still running. Check again with: ./do/benchmark --status"
168
+ ;;
169
+ Failed)
170
+ FAILURE_REASON=$(aws sagemaker describe-ai-benchmark-job \
171
+ --ai-benchmark-job-name "${JOB_NAME}" \
172
+ --region "${AWS_REGION}" \
173
+ --query 'FailureReason' \
174
+ --output text 2>/dev/null) || FAILURE_REASON="unknown"
175
+ echo " Reason: ${FAILURE_REASON}"
176
+ ;;
177
+ esac
178
+ exit 0
179
+ fi
180
+
181
+
62
182
  # ── Require --workload flag ───────────────────────────────────────────────────
63
183
  if [ -z "${ARG_WORKLOAD}" ]; then
64
184
  echo "❌ --workload <name> is required"
@@ -172,8 +292,11 @@ print(f's3://{bucket}/${PROJECT_NAME}/')
172
292
 
173
293
  CI_BENCHMARK_RESULTS_BUCKET=$(echo "${_PROFILE_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('ciBenchmarkResultsBucket', ''))" 2>/dev/null) || CI_BENCHMARK_RESULTS_BUCKET=""
174
294
 
175
- # Derive job names at runtime (unique per invocation)
176
- BENCHMARK_JOB_NAME="${PROJECT_NAME}-benchmark-$(date +%Y%m%d-%H%M%S)"
295
+ ROLE_ARN=$(echo "${_PROFILE_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('roleArn', ''))" 2>/dev/null) || ROLE_ARN=""
296
+
297
+ # Derive job names at runtime (unique per invocation).
298
+ # Preserve BENCHMARK_JOB_NAME if already set (from do/config or env) for resume logic.
299
+ BENCHMARK_JOB_NAME="${BENCHMARK_JOB_NAME:-${PROJECT_NAME}-benchmark-$(date +%Y%m%d-%H%M%S)}"
177
300
  BENCHMARK_WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config-$(date +%Y%m%d-%H%M%S)"
178
301
 
179
302
  # Ensure benchmark params have defaults (in case workload catalog wasn't found)
@@ -228,7 +351,15 @@ if [ -n "${BENCHMARK_CONCURRENCY_LEVELS:-}" ] && [ -z "${_BENCHMARK_SINGLE_LEVEL
228
351
  if [ -n "${IC_ARG}" ]; then _REINVOKE_ARGS="${_REINVOKE_ARGS} --ic ${IC_ARG}"; fi
229
352
  if [ -n "${ADAPTER_ARG}" ]; then _REINVOKE_ARGS="${_REINVOKE_ARGS} --adapter ${ADAPTER_ARG}"; fi
230
353
 
231
- if "${BASH_SOURCE[0]}" ${_REINVOKE_ARGS}; then
354
+ _CHILD_EXIT=0
355
+ "${BASH_SOURCE[0]}" ${_REINVOKE_ARGS} || _CHILD_EXIT=$?
356
+
357
+ if [ ${_CHILD_EXIT} -eq 130 ]; then
358
+ # Child was interrupted (Ctrl+C) — propagate cleanly
359
+ exit 130
360
+ fi
361
+
362
+ if [ ${_CHILD_EXIT} -eq 0 ]; then
232
363
  # Copy results to aggregation directory — find the child's results
233
364
  # Try the marker file first (set by child), then fall back to ls -td
234
365
  _LATEST_JOB_DIR=""
@@ -427,7 +558,8 @@ print(f'Combined {n_metrics} concurrency level results')
427
558
  --project-name "${PROJECT_NAME}" \
428
559
  --workload "${BENCHMARK_WORKLOAD:-manual}" \
429
560
  --bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
430
- --region "${AWS_REGION:-${REGION}}"; then
561
+ --region "${AWS_REGION:-${REGION}}" \
562
+ ${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
431
563
  echo "✅ Multi-level benchmark results persisted to S3"
432
564
  else
433
565
  echo "⚠️ Failed to persist multi-level benchmark results to Athena (non-fatal)"
@@ -747,7 +879,10 @@ echo ""
747
879
  echo "⚙️ Step 1: Creating AI Workload Config: ${WORKLOAD_CONFIG_NAME}"
748
880
 
749
881
  # Build parameters block
750
- PARAMS_JSON="{\"prompt_input_tokens_mean\":${BENCHMARK_INPUT_TOKENS_MEAN},\"output_tokens_mean\":${BENCHMARK_OUTPUT_TOKENS_MEAN},\"concurrency\":${BENCHMARK_CONCURRENCY},\"streaming\":${BENCHMARK_STREAMING},\"tokenizer\":\"${MODEL_NAME}\""
882
+ # Use HF_MODEL_ID for tokenizer (the original HuggingFace repo ID, e.g. "Qwen/Qwen3-0.6B").
883
+ # MODEL_NAME may have been rewritten to an S3 URI by do/stage, which AIPerf can't use as a tokenizer source.
884
+ BENCHMARK_TOKENIZER="${HF_MODEL_ID:-${MODEL_NAME}}"
885
+ PARAMS_JSON="{\"prompt_input_tokens_mean\":${BENCHMARK_INPUT_TOKENS_MEAN},\"output_tokens_mean\":${BENCHMARK_OUTPUT_TOKENS_MEAN},\"concurrency\":${BENCHMARK_CONCURRENCY},\"streaming\":${BENCHMARK_STREAMING},\"tokenizer\":\"${BENCHMARK_TOKENIZER}\""
751
886
 
752
887
  # Add optional request_count if specified
753
888
  if [ -n "${BENCHMARK_REQUEST_COUNT:-}" ]; then
@@ -856,6 +991,18 @@ fi # end of RESUME_EXISTING=false block
856
991
  # Skip polling if we already know the job completed (resumed a finished job)
857
992
  if [ "${JOB_STATUS:-}" != "Completed" ] && [ "${JOB_STATUS:-}" != "Failed" ] && [ "${JOB_STATUS:-}" != "Stopped" ]; then
858
993
 
994
+ # Handle Ctrl+C during polling — exit cleanly without stopping the remote job.
995
+ _handle_benchmark_interrupt() {
996
+ echo ""
997
+ echo ""
998
+ echo "⚠️ Interrupted — job continues running in background"
999
+ echo " Job: ${BENCHMARK_JOB_NAME}"
1000
+ echo ""
1001
+ echo " Check status: aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
1002
+ exit 130
1003
+ }
1004
+ trap '_handle_benchmark_interrupt' INT
1005
+
859
1006
  echo "⏳ Step 3: Waiting for benchmark to complete..."
860
1007
  echo " Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 30 min)"
861
1008
  echo ""
@@ -897,13 +1044,14 @@ while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
897
1044
  esac
898
1045
  done
899
1046
 
1047
+ trap - INT
1048
+
900
1049
  # Check for timeout
901
1050
  if [ ${POLL_COUNT} -ge ${MAX_POLL_ATTEMPTS} ]; then
902
1051
  echo ""
903
1052
  echo "⚠️ Benchmark timed out after 30 minutes (status: ${JOB_STATUS})"
904
- echo " The job may still be running. Re-run ./do/benchmark to resume waiting."
905
- echo " Or check status manually:"
906
- echo " aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
1053
+ echo " The job may still be running."
1054
+ echo " Check status: ./do/benchmark --status"
907
1055
  exit 1
908
1056
  fi
909
1057
 
@@ -949,7 +1097,16 @@ if [ "${JOB_STATUS}" = "Completed" ]; then
949
1097
  # Extract any tar.gz archives (benchmark service packages results as output.tar.gz)
950
1098
  for ARCHIVE in $(find "${LOCAL_RESULTS_DIR}" -name "*.tar.gz" -type f 2>/dev/null); do
951
1099
  ARCHIVE_DIR=$(dirname "${ARCHIVE}")
952
- tar -xzf "${ARCHIVE}" -C "${ARCHIVE_DIR}" 2>/dev/null || true
1100
+ # Detect whether tar has a leading directory prefix to strip.
1101
+ # Some AIPerf versions wrap in output/, others are flat.
1102
+ _TAR_FIRST=$(tar -tzf "${ARCHIVE}" 2>/dev/null | head -1)
1103
+ if echo "${_TAR_FIRST}" | grep -qE '^[^/]+/$'; then
1104
+ # Leading directory (e.g., "output/") — strip it
1105
+ tar -xzf "${ARCHIVE}" --strip-components=1 -C "${ARCHIVE_DIR}" 2>/dev/null || true
1106
+ else
1107
+ # Flat archive — extract as-is
1108
+ tar -xzf "${ARCHIVE}" -C "${ARCHIVE_DIR}" 2>/dev/null || true
1109
+ fi
953
1110
  done
954
1111
 
955
1112
  # Look for specific result files (priority: JSONL > aiperf JSON)
@@ -1208,7 +1365,8 @@ except Exception as e:
1208
1365
  --workload "${BENCHMARK_WORKLOAD:-manual}" \
1209
1366
  --concurrency "${BENCHMARK_CONCURRENCY}" \
1210
1367
  --bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
1211
- --region "${AWS_REGION:-${REGION}}"; then
1368
+ --region "${AWS_REGION:-${REGION}}" \
1369
+ ${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
1212
1370
  echo "✅ Benchmark results persisted to S3"
1213
1371
  else
1214
1372
  echo "⚠️ Failed to persist benchmark results to Athena (non-fatal)"
@@ -211,9 +211,32 @@ export <%= key %>=${<%= key %>:-<%= value %>}
211
211
  <% }); %>
212
212
  <% } %>
213
213
 
214
+ <% if (typeof icEnvVars !== 'undefined' && icEnvVars && Object.keys(icEnvVars).length > 0) { %>
215
+ # Deploy-time IC environment variables (applied at IC creation via SDK v3, not baked into image)
216
+ # Max 16 vars, max 1024 chars per key/value
217
+ # WARNING: Do not store raw secrets here. Use Secrets Manager ARN pattern instead:
218
+ # export IC_ENV_HF_TOKEN_ARN=arn:aws:secretsmanager:REGION:ACCOUNT:secret:NAME
219
+ # Runtime code resolves the ARN to the secret value (same pattern as HF_TOKEN_ARN in do/build).
220
+ <% Object.entries(icEnvVars).forEach(([key, value]) => { %>
221
+ export IC_ENV_<%= key %>=${IC_ENV_<%= key %>:-<%= value %>}
222
+ <% }); %>
223
+ <% } else if (deploymentTarget === 'realtime-inference') { %>
224
+ # ─── Deploy-time IC environment variables (uncomment to configure) ─────────────
225
+ # These are passed as the Environment field in InferenceComponent.create() at deploy time.
226
+ # They do NOT affect the Docker build — build-time env vars remain in Dockerfile via ENV.
227
+ # Max 16 vars, max 1024 chars per key/value.
228
+ # WARNING: Do not store raw secrets here. Use Secrets Manager ARN pattern instead:
229
+ # export IC_ENV_HF_TOKEN_ARN=arn:aws:secretsmanager:REGION:ACCOUNT:secret:NAME
230
+ # export IC_ENV_VLLM_MAX_MODEL_LEN=8192
231
+ # export IC_ENV_VLLM_GPU_MEMORY_UTILIZATION=0.85
232
+ <% } %>
233
+
214
234
  # Framework-specific configuration
215
235
  <% if (framework === 'transformers') { %>
216
236
  export MODEL_NAME="<%= modelName %>"
237
+ # HuggingFace Model ID — preserved even after do/stage rewrites MODEL_NAME to S3.
238
+ # Used by do/benchmark (tokenizer), do/tune (model catalog), and do/test (chat template).
239
+ export HF_MODEL_ID="<%= modelName %>"
217
240
  # Secrets Manager integration: when an ARN is configured, do-scripts resolve the
218
241
  # secret at the appropriate stage (build-time or runtime). When a plaintext value
219
242
  # is configured, it is exported directly. The _ARN suffix signals resolution is needed.
@@ -253,6 +276,7 @@ export TUNE_MODEL_ID="<%= tuneModelId %>"
253
276
 
254
277
  <% if (framework === 'diffusors') { %>
255
278
  export MODEL_NAME="<%= modelName %>"
279
+ export HF_MODEL_ID="<%= modelName %>"
256
280
  # Secrets Manager integration: when an ARN is configured, do-scripts resolve the
257
281
  # secret at the appropriate stage (build-time or runtime). When a plaintext value
258
282
  # is configured, it is exported directly. The _ARN suffix signals resolution is needed.
@@ -6,6 +6,50 @@
6
6
  # PROJECT_NAME, ENDPOINT_NAME, ECR_REPOSITORY, AWS_REGION, CONTAINER_ENV_JSON
7
7
  # Also expects _update_config_var() to be available (from wait.sh).
8
8
 
9
+ # _collect_ic_env_vars()
10
+ # Reads IC_ENV_* prefixed variables from the environment (sourced from do/config),
11
+ # strips the IC_ENV_ prefix, validates constraints, and outputs JSON key-value pairs.
12
+ # Constraints: max 16 entries, max 1024 chars per key/value.
13
+ # IC_ENV_* overrides take precedence over CONTAINER_ENV_JSON.
14
+ #
15
+ # Sets IC_ENV_OVERRIDE in the caller's scope.
16
+ _collect_ic_env_vars() {
17
+ IC_ENV_OVERRIDE=""
18
+ local ic_env_count=0
19
+
20
+ while IFS='=' read -r full_key value; do
21
+ # Skip empty lines
22
+ [ -z "${full_key}" ] && continue
23
+
24
+ local stripped_key="${full_key#IC_ENV_}"
25
+
26
+ # Validate key length (AC-3.4)
27
+ if [ ${#stripped_key} -gt 1024 ]; then
28
+ echo "⚠️ IC_ENV_${stripped_key}: key exceeds 1024 chars, skipping" >&2
29
+ continue
30
+ fi
31
+
32
+ # Validate value length (AC-3.4)
33
+ if [ ${#value} -gt 1024 ]; then
34
+ echo "⚠️ IC_ENV_${stripped_key}: value exceeds 1024 chars, skipping" >&2
35
+ continue
36
+ fi
37
+
38
+ ic_env_count=$((ic_env_count + 1))
39
+
40
+ # Max 16 env vars (AC-3.3)
41
+ if [ ${ic_env_count} -gt 16 ]; then
42
+ echo "⚠️ More than 16 IC_ENV_* variables defined. Using first 16 only." >&2
43
+ break
44
+ fi
45
+
46
+ if [ -n "${IC_ENV_OVERRIDE}" ]; then
47
+ IC_ENV_OVERRIDE="${IC_ENV_OVERRIDE},"
48
+ fi
49
+ IC_ENV_OVERRIDE="${IC_ENV_OVERRIDE}\"${stripped_key}\":\"${value}\""
50
+ done < <(env | grep "^IC_ENV_" | sort)
51
+ }
52
+
9
53
  # create_inference_component <ic_config_file>
10
54
  # Creates an inference component from a per-IC config file.
11
55
  #
@@ -17,6 +61,10 @@
17
61
  # IC_STARTUP_TIMEOUT — container startup health check timeout in seconds (default: 900)
18
62
  # IC_CONTAINER_ENV_EXTRA — optional extra env vars in "KEY":"value" format
19
63
  #
64
+ # IC_ENV_* prefixed vars from do/config are collected, validated, and passed
65
+ # as the Environment field in InferenceComponent.create() via SDK v3.
66
+ # Precedence: IC_ENV_* > IC_CONTAINER_ENV_EXTRA > CONTAINER_ENV_JSON
67
+ #
20
68
  # Multi-spec support (for heterogeneous instance pools):
21
69
  # IC_MULTI_SPEC — set to "true" to use Specifications (plural) array
22
70
  # IC_SPEC_COUNT — number of spec entries (e.g., 2)
@@ -38,6 +86,9 @@ create_inference_component() {
38
86
  # Source the IC config to get per-IC settings
39
87
  source "${ic_conf}"
40
88
 
89
+ # Collect IC_ENV_* overrides from environment (sourced from do/config)
90
+ _collect_ic_env_vars
91
+
41
92
  local ic_timestamp
42
93
  ic_timestamp=$(date +%s)
43
94
  local ic_basename
@@ -48,9 +99,11 @@ create_inference_component() {
48
99
  local container_spec="{\"Image\":\"${ECR_REPOSITORY}:${IC_IMAGE_TAG:-${PROJECT_NAME}-latest}\""
49
100
  # Always inject IC name for CW log forwarder
50
101
  local ic_env="\"INFERENCE_COMPONENT_NAME\":\"${ic_name}\""
51
- if [ -n "${CONTAINER_ENV_JSON}${IC_CONTAINER_ENV_EXTRA:-}" ]; then
52
- local env_json="${CONTAINER_ENV_JSON}"
53
- [ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
102
+ # Build environment JSON with precedence: IC_ENV_* > IC_CONTAINER_ENV_EXTRA > CONTAINER_ENV_JSON
103
+ local env_json="${CONTAINER_ENV_JSON}"
104
+ [ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
105
+ [ -n "${IC_ENV_OVERRIDE:-}" ] && env_json="${env_json:+${env_json},}${IC_ENV_OVERRIDE}"
106
+ if [ -n "${env_json}" ]; then
54
107
  container_spec="${container_spec},\"Environment\":{${ic_env},${env_json}}"
55
108
  else
56
109
  container_spec="${container_spec},\"Environment\":{${ic_env}}"
@@ -44,5 +44,10 @@ except:
44
44
  fi
45
45
  fi
46
46
 
47
+ # Map commonly-used profile values to the variable names scripts expect.
48
+ # Explicit env vars take precedence (${X:-...} pattern).
49
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
50
+ CI_BENCHMARK_RESULTS_BUCKET="${CI_BENCHMARK_RESULTS_BUCKET:-${_PROFILE_ciBenchmarkResultsBucket:-}}"
51
+
47
52
  # NOTE: set -u is NOT re-enabled here. The caller is responsible for managing
48
53
  # their own shell options.