@aws/ml-container-creator 0.13.4 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -5
- package/config/parameter-schema-v2.json +32 -4
- package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
- package/infra/ci-harness/package-lock.json +122 -116
- package/infra/ci-harness/package.json +1 -1
- package/package.json +5 -3
- package/pyproject.toml +21 -0
- package/requirements.txt +19 -0
- package/servers/instance-sizer/index.js +72 -4
- package/servers/instance-sizer/lib/model-resolver.js +28 -2
- package/src/app.js +17 -0
- package/src/lib/bootstrap-command-handler.js +33 -23
- package/src/lib/config-loader.js +18 -0
- package/src/lib/config-manager.js +6 -1
- package/src/lib/dataset-slug.js +152 -0
- package/src/lib/generated/cli-options.js +9 -3
- package/src/lib/generated/parameter-matrix.js +14 -3
- package/src/lib/generated/validation-rules.js +1 -1
- package/src/lib/mcp-query-runner.js +6 -0
- package/src/lib/prompt-runner.js +5 -0
- package/src/lib/prompts/feature-prompts.js +1 -1
- package/src/lib/template-manager.js +0 -7
- package/src/lib/template-variable-resolver.js +51 -1
- package/src/lib/tune-config-state.js +14 -1
- package/templates/do/.adapter_helper.py +451 -0
- package/templates/do/.benchmark_writer.py +22 -0
- package/templates/do/.register_helper.py +1163 -0
- package/templates/do/.stage_helper.py +419 -0
- package/templates/do/.tune_helper.py +379 -65
- package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
- package/templates/do/adapter +427 -27
- package/templates/do/add-ic +85 -3
- package/templates/do/benchmark +173 -15
- package/templates/do/config +24 -0
- package/templates/do/lib/inference-component.sh +56 -3
- package/templates/do/lib/profile.sh +5 -0
- package/templates/do/register +552 -6
- package/templates/do/stage +91 -272
- package/templates/do/test +12 -2
- package/templates/do/tune +264 -12
package/templates/do/add-ic
CHANGED
|
@@ -123,6 +123,71 @@ if [ -n "${MODEL_DATA}" ]; then
|
|
|
123
123
|
fi
|
|
124
124
|
echo ""
|
|
125
125
|
|
|
126
|
+
# ============================================================
|
|
127
|
+
# Query model registry for available versions (AC-5.1)
|
|
128
|
+
# Non-intrusive: if registry query fails, skip silently
|
|
129
|
+
# ============================================================
|
|
130
|
+
REGISTRY_MODELS_JSON=""
|
|
131
|
+
REGISTRY_MODEL_COUNT=0
|
|
132
|
+
REGISTRY_SELECTED_MODEL_DATA=""
|
|
133
|
+
REGISTRY_SELECTED_IMAGE=""
|
|
134
|
+
|
|
135
|
+
if [ -z "${MODEL_DATA}" ]; then
|
|
136
|
+
# Only query registry if MODEL_DATA was not already provided via --from-tune or --model-data
|
|
137
|
+
if [ -t 0 ]; then
|
|
138
|
+
# Interactive mode: query registry for available models
|
|
139
|
+
_registry_json=$(python3 "${SCRIPT_DIR}/.register_helper.py" list-models \
|
|
140
|
+
--project-name "${PROJECT_NAME}" \
|
|
141
|
+
--region "${AWS_REGION:-us-east-1}" 2>/dev/null || echo "")
|
|
142
|
+
|
|
143
|
+
_registry_line=$(echo "${_registry_json}" | grep -E '^\{' | tail -1)
|
|
144
|
+
|
|
145
|
+
if [ -n "${_registry_line}" ]; then
|
|
146
|
+
REGISTRY_MODEL_COUNT=$(echo "${_registry_line}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(len(data.get('models',[])))" 2>/dev/null || echo "0")
|
|
147
|
+
|
|
148
|
+
if [ "${REGISTRY_MODEL_COUNT}" -gt 0 ]; then
|
|
149
|
+
REGISTRY_MODELS_JSON="${_registry_line}"
|
|
150
|
+
|
|
151
|
+
echo "📦 Available registered models:"
|
|
152
|
+
echo ""
|
|
153
|
+
printf ' %-4s%-10s%-12s%-38s%s\n' "#" "VERSION" "CONFIG" "MODEL" "INSTANCE"
|
|
154
|
+
|
|
155
|
+
_i=0
|
|
156
|
+
while [ "${_i}" -lt "${REGISTRY_MODEL_COUNT}" ]; do
|
|
157
|
+
_v=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['models'][${_i}].get('version','?'))" 2>/dev/null)
|
|
158
|
+
_c=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['models'][${_i}].get('deploymentConfig','?'))" 2>/dev/null)
|
|
159
|
+
_m=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); m=data['models'][${_i}].get('modelName','?'); print(m[:36]+'…' if len(m)>36 else m)" 2>/dev/null)
|
|
160
|
+
_inst=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['models'][${_i}].get('instanceType','?'))" 2>/dev/null)
|
|
161
|
+
|
|
162
|
+
_num=$((_i + 1))
|
|
163
|
+
printf ' %-4s%-10s%-12s%-38s%s\n' "${_num}" "v${_v}" "${_c}" "${_m}" "${_inst}"
|
|
164
|
+
_i=$((_i + 1))
|
|
165
|
+
done
|
|
166
|
+
|
|
167
|
+
echo ""
|
|
168
|
+
read -p "Select a model (1-${REGISTRY_MODEL_COUNT}) or press Enter to specify manually: " _selection
|
|
169
|
+
|
|
170
|
+
if [ -n "${_selection}" ]; then
|
|
171
|
+
# Validate selection
|
|
172
|
+
if echo "${_selection}" | grep -qE '^[0-9]+$' && [ "${_selection}" -ge 1 ] && [ "${_selection}" -le "${REGISTRY_MODEL_COUNT}" ]; then
|
|
173
|
+
_sel_idx=$((_selection - 1))
|
|
174
|
+
REGISTRY_SELECTED_MODEL_DATA=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['models'][${_sel_idx}].get('modelDataUrl',''))" 2>/dev/null || echo "")
|
|
175
|
+
REGISTRY_SELECTED_IMAGE=$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); img=data['models'][${_sel_idx}].get('containerImage',''); print(img.split('/')[-1] if '/' in img else img)" 2>/dev/null || echo "")
|
|
176
|
+
|
|
177
|
+
echo ""
|
|
178
|
+
echo "✅ Selected registry model v$(echo "${REGISTRY_MODELS_JSON}" | python3 -c "import sys,json; data=json.loads(sys.stdin.read()); print(data['models'][${_sel_idx}].get('version','?'))" 2>/dev/null)"
|
|
179
|
+
echo ""
|
|
180
|
+
else
|
|
181
|
+
echo " ⚠️ Invalid selection, proceeding with manual entry."
|
|
182
|
+
echo ""
|
|
183
|
+
fi
|
|
184
|
+
fi
|
|
185
|
+
# If user pressed Enter without selection, proceed with manual entry
|
|
186
|
+
fi
|
|
187
|
+
fi
|
|
188
|
+
fi
|
|
189
|
+
fi
|
|
190
|
+
|
|
126
191
|
# ============================================================
|
|
127
192
|
# Prompt for IC name (if not provided as argument)
|
|
128
193
|
# ============================================================
|
|
@@ -169,12 +234,29 @@ else
|
|
|
169
234
|
fi
|
|
170
235
|
|
|
171
236
|
# ============================================================
|
|
172
|
-
# Prompt for image tag
|
|
237
|
+
# Prompt for image tag (AC-5.2, AC-5.3: pre-fill from registry, user can override)
|
|
173
238
|
# ============================================================
|
|
174
|
-
|
|
239
|
+
if [ -n "${REGISTRY_SELECTED_IMAGE}" ]; then
|
|
240
|
+
DEFAULT_IMAGE_TAG="${REGISTRY_SELECTED_IMAGE}"
|
|
241
|
+
else
|
|
242
|
+
DEFAULT_IMAGE_TAG="${PROJECT_NAME}-latest"
|
|
243
|
+
fi
|
|
175
244
|
read -p "Image tag [${DEFAULT_IMAGE_TAG}]: " IC_IMAGE_TAG
|
|
176
245
|
IC_IMAGE_TAG="${IC_IMAGE_TAG:-${DEFAULT_IMAGE_TAG}}"
|
|
177
246
|
|
|
247
|
+
# ============================================================
|
|
248
|
+
# Prompt for model data URL (AC-5.2, AC-5.3: pre-fill from registry, user can override)
|
|
249
|
+
# ============================================================
|
|
250
|
+
if [ -z "${MODEL_DATA}" ] && [ -n "${REGISTRY_SELECTED_MODEL_DATA}" ]; then
|
|
251
|
+
# Pre-fill from registry selection — user can override (AC-5.3)
|
|
252
|
+
read -p "Model data URL [${REGISTRY_SELECTED_MODEL_DATA}]: " _model_data_input
|
|
253
|
+
MODEL_DATA="${_model_data_input:-${REGISTRY_SELECTED_MODEL_DATA}}"
|
|
254
|
+
elif [ -z "${MODEL_DATA}" ]; then
|
|
255
|
+
# No registry selection — offer manual entry (optional)
|
|
256
|
+
read -p "Model data URL (S3 URI, optional — press Enter to skip): " _model_data_input
|
|
257
|
+
MODEL_DATA="${_model_data_input:-}"
|
|
258
|
+
fi
|
|
259
|
+
|
|
178
260
|
# ============================================================
|
|
179
261
|
# Prompt for GPU count
|
|
180
262
|
# ============================================================
|
|
@@ -232,7 +314,7 @@ export IC_MIN_MEMORY_MB=${IC_MIN_MEMORY_MB}
|
|
|
232
314
|
export IC_STARTUP_TIMEOUT=900
|
|
233
315
|
EOF
|
|
234
316
|
|
|
235
|
-
# Add model data if provided (from --from-tune
|
|
317
|
+
# Add model data if provided (from --from-tune, --model-data, or registry selection)
|
|
236
318
|
if [ -n "${MODEL_DATA}" ]; then
|
|
237
319
|
cat >> "${IC_CONF_PATH}" <<EOF
|
|
238
320
|
export IC_MODEL_DATA="${MODEL_DATA}"
|
package/templates/do/benchmark
CHANGED
|
@@ -12,10 +12,12 @@ set -o pipefail
|
|
|
12
12
|
# ── Source project configuration ──────────────────────────────────────────────
|
|
13
13
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
14
14
|
source "${SCRIPT_DIR}/config"
|
|
15
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
15
16
|
|
|
16
17
|
# ── Parse flags ───────────────────────────────────────────────────────────────
|
|
17
18
|
CLEAN_AFTER=false
|
|
18
19
|
FORCE=false
|
|
20
|
+
ARG_STATUS=false
|
|
19
21
|
IC_ARG=""
|
|
20
22
|
ADAPTER_ARG=""
|
|
21
23
|
ARG_NO_STALE_WARNING=false
|
|
@@ -24,18 +26,20 @@ while [ $# -gt 0 ]; do
|
|
|
24
26
|
case "$1" in
|
|
25
27
|
--clean) CLEAN_AFTER=true; shift ;;
|
|
26
28
|
--force) FORCE=true; shift ;;
|
|
29
|
+
--status) ARG_STATUS=true; shift ;;
|
|
27
30
|
--no-stale-warning) ARG_NO_STALE_WARNING=true; shift ;;
|
|
28
31
|
--workload) shift; ARG_WORKLOAD="${1:-}"; shift ;;
|
|
29
32
|
--ic) shift; IC_ARG="${1:-}"; shift ;;
|
|
30
33
|
--adapter) shift; ADAPTER_ARG="${1:-}"; shift ;;
|
|
31
34
|
--help|-h)
|
|
32
|
-
echo "Usage: ./do/benchmark [--workload <name>] [--ic <name>] [--adapter <name>] [--force] [--clean]
|
|
35
|
+
echo "Usage: ./do/benchmark [--workload <name>] [--status] [--ic <name>] [--adapter <name>] [--force] [--clean]"
|
|
33
36
|
echo ""
|
|
34
37
|
echo "Run SageMaker AI Benchmark against the deployed endpoint."
|
|
35
38
|
echo ""
|
|
36
39
|
echo "Options:"
|
|
40
|
+
echo " --status Check job status; if completed, download results + write to Athena"
|
|
37
41
|
echo " --ic <name> Benchmark a specific inference component"
|
|
38
|
-
echo " --adapter <name> Benchmark a specific LoRA adapter IC"
|
|
42
|
+
echo " --adapter <name> Benchmark a specific LoRA adapter (routes via adapter IC, records adapter_name in Athena)"
|
|
39
43
|
echo " --force Create a new benchmark job even if one is already running"
|
|
40
44
|
echo " --clean Delete workload config and benchmark job after displaying results"
|
|
41
45
|
echo " --no-stale-warning Suppress schema registry staleness warning"
|
|
@@ -45,9 +49,11 @@ while [ $# -gt 0 ]; do
|
|
|
45
49
|
echo " --ic <name> Use IC_DEPLOYED_NAME from do/ic/<name>.conf"
|
|
46
50
|
echo " (no flag) Use first IC in do/ic/ alphabetically, or legacy config"
|
|
47
51
|
echo ""
|
|
48
|
-
echo "
|
|
49
|
-
echo "
|
|
50
|
-
echo "
|
|
52
|
+
echo "Adapter benchmarks are differentiated from base model runs in Athena via the adapter_name column."
|
|
53
|
+
echo ""
|
|
54
|
+
echo "Status:"
|
|
55
|
+
echo " After interrupting a running benchmark, use --status to check completion"
|
|
56
|
+
echo " and trigger results download + Athena write."
|
|
51
57
|
echo ""
|
|
52
58
|
echo "Prerequisites:"
|
|
53
59
|
echo " • Endpoint must be deployed and InService (run ./do/deploy first)"
|
|
@@ -59,6 +65,120 @@ while [ $# -gt 0 ]; do
|
|
|
59
65
|
done
|
|
60
66
|
|
|
61
67
|
|
|
68
|
+
# ── Handle --status (early exit) ─────────────────────────────────────────────
|
|
69
|
+
# Query the tracked benchmark job, display status, and if completed:
|
|
70
|
+
# download results, display metrics, and write to Athena (if not already done).
|
|
71
|
+
if [ "${ARG_STATUS}" = true ]; then
|
|
72
|
+
JOB_NAME="${BENCHMARK_JOB_NAME:-}"
|
|
73
|
+
if [ -z "${JOB_NAME}" ]; then
|
|
74
|
+
echo "❌ No benchmark job tracked"
|
|
75
|
+
echo " Run ./do/benchmark --workload <name> to start one."
|
|
76
|
+
exit 1
|
|
77
|
+
fi
|
|
78
|
+
|
|
79
|
+
echo "📊 Benchmark Job Status"
|
|
80
|
+
echo ""
|
|
81
|
+
echo " Job: ${JOB_NAME}"
|
|
82
|
+
|
|
83
|
+
STATUS=$(aws sagemaker describe-ai-benchmark-job \
|
|
84
|
+
--ai-benchmark-job-name "${JOB_NAME}" \
|
|
85
|
+
--region "${AWS_REGION}" \
|
|
86
|
+
--query 'AIBenchmarkJobStatus' \
|
|
87
|
+
--output text 2>/dev/null) || STATUS=""
|
|
88
|
+
|
|
89
|
+
if [ -z "${STATUS}" ]; then
|
|
90
|
+
echo " Status: Unknown (job not found or credentials expired)"
|
|
91
|
+
exit 1
|
|
92
|
+
fi
|
|
93
|
+
|
|
94
|
+
echo " Status: ${STATUS}"
|
|
95
|
+
|
|
96
|
+
case "${STATUS}" in
|
|
97
|
+
Completed)
|
|
98
|
+
# Check if results already exist locally
|
|
99
|
+
PROJECT_ROOT="${SCRIPT_DIR}/.."
|
|
100
|
+
LOCAL_RESULTS_DIR="${PROJECT_ROOT}/benchmarks/${JOB_NAME}"
|
|
101
|
+
RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1)
|
|
102
|
+
|
|
103
|
+
if [ -z "${RESULTS_JSONL}" ]; then
|
|
104
|
+
echo ""
|
|
105
|
+
echo " 📥 Downloading results..."
|
|
106
|
+
RESULTS_S3_PATH=$(aws sagemaker describe-ai-benchmark-job \
|
|
107
|
+
--ai-benchmark-job-name "${JOB_NAME}" \
|
|
108
|
+
--region "${AWS_REGION}" \
|
|
109
|
+
--query 'OutputConfig.S3OutputLocation' \
|
|
110
|
+
--output text 2>/dev/null)
|
|
111
|
+
|
|
112
|
+
if [ -n "${RESULTS_S3_PATH}" ]; then
|
|
113
|
+
mkdir -p "${LOCAL_RESULTS_DIR}/output"
|
|
114
|
+
aws s3 sync "${RESULTS_S3_PATH}" "${LOCAL_RESULTS_DIR}/output/" \
|
|
115
|
+
--region "${AWS_REGION}" --quiet
|
|
116
|
+
# Untar if output.tar.gz exists
|
|
117
|
+
tar_file=""
|
|
118
|
+
tar_file=$(find "${LOCAL_RESULTS_DIR}" -name "output.tar.gz" -type f 2>/dev/null | head -1)
|
|
119
|
+
if [ -n "${tar_file}" ]; then
|
|
120
|
+
# Detect whether tar has a leading directory prefix
|
|
121
|
+
_tar_first=""
|
|
122
|
+
_tar_first=$(tar -tzf "${tar_file}" 2>/dev/null | head -1)
|
|
123
|
+
if echo "${_tar_first}" | grep -qE '^[^/]+/$'; then
|
|
124
|
+
tar -xzf "${tar_file}" --strip-components=1 -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
|
|
125
|
+
else
|
|
126
|
+
tar -xzf "${tar_file}" -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
|
|
127
|
+
fi
|
|
128
|
+
fi
|
|
129
|
+
# Re-search after extraction
|
|
130
|
+
RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1)
|
|
131
|
+
echo " ✅ Results downloaded to: benchmarks/${JOB_NAME}/"
|
|
132
|
+
fi
|
|
133
|
+
else
|
|
134
|
+
echo " ✅ Results already available locally"
|
|
135
|
+
fi
|
|
136
|
+
|
|
137
|
+
# Write to Athena if CI bucket is configured and results exist
|
|
138
|
+
if [ -n "${CI_BENCHMARK_RESULTS_BUCKET:-}" ]; then
|
|
139
|
+
_WRITER_INPUT=""
|
|
140
|
+
if [ -n "${RESULTS_JSONL}" ] && [ -f "${RESULTS_JSONL}" ]; then
|
|
141
|
+
_WRITER_INPUT="${RESULTS_JSONL}"
|
|
142
|
+
else
|
|
143
|
+
_WRITER_INPUT=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export_aiperf.json" -type f 2>/dev/null | head -1)
|
|
144
|
+
fi
|
|
145
|
+
|
|
146
|
+
if [ -n "${_WRITER_INPUT}" ]; then
|
|
147
|
+
echo ""
|
|
148
|
+
echo " 📊 Writing to Athena..."
|
|
149
|
+
if python3 "$(dirname "${BASH_SOURCE[0]}")/.benchmark_writer.py" write \
|
|
150
|
+
--results-file "${_WRITER_INPUT}" \
|
|
151
|
+
--config-file "$(dirname "${BASH_SOURCE[0]}")/config" \
|
|
152
|
+
--project-name "${PROJECT_NAME}" \
|
|
153
|
+
--workload "${BENCHMARK_WORKLOAD:-manual}" \
|
|
154
|
+
--concurrency "${BENCHMARK_CONCURRENCY:-2}" \
|
|
155
|
+
--bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
|
|
156
|
+
--region "${AWS_REGION:-${REGION}}" \
|
|
157
|
+
${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
|
|
158
|
+
echo " ✅ Results persisted to Athena"
|
|
159
|
+
else
|
|
160
|
+
echo " ⚠️ Athena write failed (non-fatal)"
|
|
161
|
+
fi
|
|
162
|
+
fi
|
|
163
|
+
fi
|
|
164
|
+
;;
|
|
165
|
+
InProgress|Starting|Pending)
|
|
166
|
+
echo ""
|
|
167
|
+
echo " Job is still running. Check again with: ./do/benchmark --status"
|
|
168
|
+
;;
|
|
169
|
+
Failed)
|
|
170
|
+
FAILURE_REASON=$(aws sagemaker describe-ai-benchmark-job \
|
|
171
|
+
--ai-benchmark-job-name "${JOB_NAME}" \
|
|
172
|
+
--region "${AWS_REGION}" \
|
|
173
|
+
--query 'FailureReason' \
|
|
174
|
+
--output text 2>/dev/null) || FAILURE_REASON="unknown"
|
|
175
|
+
echo " Reason: ${FAILURE_REASON}"
|
|
176
|
+
;;
|
|
177
|
+
esac
|
|
178
|
+
exit 0
|
|
179
|
+
fi
|
|
180
|
+
|
|
181
|
+
|
|
62
182
|
# ── Require --workload flag ───────────────────────────────────────────────────
|
|
63
183
|
if [ -z "${ARG_WORKLOAD}" ]; then
|
|
64
184
|
echo "❌ --workload <name> is required"
|
|
@@ -172,8 +292,11 @@ print(f's3://{bucket}/${PROJECT_NAME}/')
|
|
|
172
292
|
|
|
173
293
|
CI_BENCHMARK_RESULTS_BUCKET=$(echo "${_PROFILE_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('ciBenchmarkResultsBucket', ''))" 2>/dev/null) || CI_BENCHMARK_RESULTS_BUCKET=""
|
|
174
294
|
|
|
175
|
-
|
|
176
|
-
|
|
295
|
+
ROLE_ARN=$(echo "${_PROFILE_JSON}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('roleArn', ''))" 2>/dev/null) || ROLE_ARN=""
|
|
296
|
+
|
|
297
|
+
# Derive job names at runtime (unique per invocation).
|
|
298
|
+
# Preserve BENCHMARK_JOB_NAME if already set (from do/config or env) for resume logic.
|
|
299
|
+
BENCHMARK_JOB_NAME="${BENCHMARK_JOB_NAME:-${PROJECT_NAME}-benchmark-$(date +%Y%m%d-%H%M%S)}"
|
|
177
300
|
BENCHMARK_WORKLOAD_CONFIG_NAME="${PROJECT_NAME}-benchmark-config-$(date +%Y%m%d-%H%M%S)"
|
|
178
301
|
|
|
179
302
|
# Ensure benchmark params have defaults (in case workload catalog wasn't found)
|
|
@@ -228,7 +351,15 @@ if [ -n "${BENCHMARK_CONCURRENCY_LEVELS:-}" ] && [ -z "${_BENCHMARK_SINGLE_LEVEL
|
|
|
228
351
|
if [ -n "${IC_ARG}" ]; then _REINVOKE_ARGS="${_REINVOKE_ARGS} --ic ${IC_ARG}"; fi
|
|
229
352
|
if [ -n "${ADAPTER_ARG}" ]; then _REINVOKE_ARGS="${_REINVOKE_ARGS} --adapter ${ADAPTER_ARG}"; fi
|
|
230
353
|
|
|
231
|
-
|
|
354
|
+
_CHILD_EXIT=0
|
|
355
|
+
"${BASH_SOURCE[0]}" ${_REINVOKE_ARGS} || _CHILD_EXIT=$?
|
|
356
|
+
|
|
357
|
+
if [ ${_CHILD_EXIT} -eq 130 ]; then
|
|
358
|
+
# Child was interrupted (Ctrl+C) — propagate cleanly
|
|
359
|
+
exit 130
|
|
360
|
+
fi
|
|
361
|
+
|
|
362
|
+
if [ ${_CHILD_EXIT} -eq 0 ]; then
|
|
232
363
|
# Copy results to aggregation directory — find the child's results
|
|
233
364
|
# Try the marker file first (set by child), then fall back to ls -td
|
|
234
365
|
_LATEST_JOB_DIR=""
|
|
@@ -427,7 +558,8 @@ print(f'Combined {n_metrics} concurrency level results')
|
|
|
427
558
|
--project-name "${PROJECT_NAME}" \
|
|
428
559
|
--workload "${BENCHMARK_WORKLOAD:-manual}" \
|
|
429
560
|
--bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
|
|
430
|
-
--region "${AWS_REGION:-${REGION}}"
|
|
561
|
+
--region "${AWS_REGION:-${REGION}}" \
|
|
562
|
+
${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
|
|
431
563
|
echo "✅ Multi-level benchmark results persisted to S3"
|
|
432
564
|
else
|
|
433
565
|
echo "⚠️ Failed to persist multi-level benchmark results to Athena (non-fatal)"
|
|
@@ -747,7 +879,10 @@ echo ""
|
|
|
747
879
|
echo "⚙️ Step 1: Creating AI Workload Config: ${WORKLOAD_CONFIG_NAME}"
|
|
748
880
|
|
|
749
881
|
# Build parameters block
|
|
750
|
-
|
|
882
|
+
# Use HF_MODEL_ID for tokenizer (the original HuggingFace repo ID, e.g. "Qwen/Qwen3-0.6B").
|
|
883
|
+
# MODEL_NAME may have been rewritten to an S3 URI by do/stage, which AIPerf can't use as a tokenizer source.
|
|
884
|
+
BENCHMARK_TOKENIZER="${HF_MODEL_ID:-${MODEL_NAME}}"
|
|
885
|
+
PARAMS_JSON="{\"prompt_input_tokens_mean\":${BENCHMARK_INPUT_TOKENS_MEAN},\"output_tokens_mean\":${BENCHMARK_OUTPUT_TOKENS_MEAN},\"concurrency\":${BENCHMARK_CONCURRENCY},\"streaming\":${BENCHMARK_STREAMING},\"tokenizer\":\"${BENCHMARK_TOKENIZER}\""
|
|
751
886
|
|
|
752
887
|
# Add optional request_count if specified
|
|
753
888
|
if [ -n "${BENCHMARK_REQUEST_COUNT:-}" ]; then
|
|
@@ -856,6 +991,18 @@ fi # end of RESUME_EXISTING=false block
|
|
|
856
991
|
# Skip polling if we already know the job completed (resumed a finished job)
|
|
857
992
|
if [ "${JOB_STATUS:-}" != "Completed" ] && [ "${JOB_STATUS:-}" != "Failed" ] && [ "${JOB_STATUS:-}" != "Stopped" ]; then
|
|
858
993
|
|
|
994
|
+
# Handle Ctrl+C during polling — exit cleanly without stopping the remote job.
|
|
995
|
+
_handle_benchmark_interrupt() {
|
|
996
|
+
echo ""
|
|
997
|
+
echo ""
|
|
998
|
+
echo "⚠️ Interrupted — job continues running in background"
|
|
999
|
+
echo " Job: ${BENCHMARK_JOB_NAME}"
|
|
1000
|
+
echo ""
|
|
1001
|
+
echo " Check status: aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
|
|
1002
|
+
exit 130
|
|
1003
|
+
}
|
|
1004
|
+
trap '_handle_benchmark_interrupt' INT
|
|
1005
|
+
|
|
859
1006
|
echo "⏳ Step 3: Waiting for benchmark to complete..."
|
|
860
1007
|
echo " Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 30 min)"
|
|
861
1008
|
echo ""
|
|
@@ -897,13 +1044,14 @@ while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
|
|
|
897
1044
|
esac
|
|
898
1045
|
done
|
|
899
1046
|
|
|
1047
|
+
trap - INT
|
|
1048
|
+
|
|
900
1049
|
# Check for timeout
|
|
901
1050
|
if [ ${POLL_COUNT} -ge ${MAX_POLL_ATTEMPTS} ]; then
|
|
902
1051
|
echo ""
|
|
903
1052
|
echo "⚠️ Benchmark timed out after 30 minutes (status: ${JOB_STATUS})"
|
|
904
|
-
echo " The job may still be running.
|
|
905
|
-
echo "
|
|
906
|
-
echo " aws sagemaker describe-ai-benchmark-job --ai-benchmark-job-name ${BENCHMARK_JOB_NAME} --region ${AWS_REGION}"
|
|
1053
|
+
echo " The job may still be running."
|
|
1054
|
+
echo " Check status: ./do/benchmark --status"
|
|
907
1055
|
exit 1
|
|
908
1056
|
fi
|
|
909
1057
|
|
|
@@ -949,7 +1097,16 @@ if [ "${JOB_STATUS}" = "Completed" ]; then
|
|
|
949
1097
|
# Extract any tar.gz archives (benchmark service packages results as output.tar.gz)
|
|
950
1098
|
for ARCHIVE in $(find "${LOCAL_RESULTS_DIR}" -name "*.tar.gz" -type f 2>/dev/null); do
|
|
951
1099
|
ARCHIVE_DIR=$(dirname "${ARCHIVE}")
|
|
952
|
-
tar
|
|
1100
|
+
# Detect whether tar has a leading directory prefix to strip.
|
|
1101
|
+
# Some AIPerf versions wrap in output/, others are flat.
|
|
1102
|
+
_TAR_FIRST=$(tar -tzf "${ARCHIVE}" 2>/dev/null | head -1)
|
|
1103
|
+
if echo "${_TAR_FIRST}" | grep -qE '^[^/]+/$'; then
|
|
1104
|
+
# Leading directory (e.g., "output/") — strip it
|
|
1105
|
+
tar -xzf "${ARCHIVE}" --strip-components=1 -C "${ARCHIVE_DIR}" 2>/dev/null || true
|
|
1106
|
+
else
|
|
1107
|
+
# Flat archive — extract as-is
|
|
1108
|
+
tar -xzf "${ARCHIVE}" -C "${ARCHIVE_DIR}" 2>/dev/null || true
|
|
1109
|
+
fi
|
|
953
1110
|
done
|
|
954
1111
|
|
|
955
1112
|
# Look for specific result files (priority: JSONL > aiperf JSON)
|
|
@@ -1208,7 +1365,8 @@ except Exception as e:
|
|
|
1208
1365
|
--workload "${BENCHMARK_WORKLOAD:-manual}" \
|
|
1209
1366
|
--concurrency "${BENCHMARK_CONCURRENCY}" \
|
|
1210
1367
|
--bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
|
|
1211
|
-
--region "${AWS_REGION:-${REGION}}"
|
|
1368
|
+
--region "${AWS_REGION:-${REGION}}" \
|
|
1369
|
+
${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
|
|
1212
1370
|
echo "✅ Benchmark results persisted to S3"
|
|
1213
1371
|
else
|
|
1214
1372
|
echo "⚠️ Failed to persist benchmark results to Athena (non-fatal)"
|
package/templates/do/config
CHANGED
|
@@ -211,9 +211,32 @@ export <%= key %>=${<%= key %>:-<%= value %>}
|
|
|
211
211
|
<% }); %>
|
|
212
212
|
<% } %>
|
|
213
213
|
|
|
214
|
+
<% if (typeof icEnvVars !== 'undefined' && icEnvVars && Object.keys(icEnvVars).length > 0) { %>
|
|
215
|
+
# Deploy-time IC environment variables (applied at IC creation via SDK v3, not baked into image)
|
|
216
|
+
# Max 16 vars, max 1024 chars per key/value
|
|
217
|
+
# WARNING: Do not store raw secrets here. Use Secrets Manager ARN pattern instead:
|
|
218
|
+
# export IC_ENV_HF_TOKEN_ARN=arn:aws:secretsmanager:REGION:ACCOUNT:secret:NAME
|
|
219
|
+
# Runtime code resolves the ARN to the secret value (same pattern as HF_TOKEN_ARN in do/build).
|
|
220
|
+
<% Object.entries(icEnvVars).forEach(([key, value]) => { %>
|
|
221
|
+
export IC_ENV_<%= key %>=${IC_ENV_<%= key %>:-<%= value %>}
|
|
222
|
+
<% }); %>
|
|
223
|
+
<% } else if (deploymentTarget === 'realtime-inference') { %>
|
|
224
|
+
# ─── Deploy-time IC environment variables (uncomment to configure) ─────────────
|
|
225
|
+
# These are passed as the Environment field in InferenceComponent.create() at deploy time.
|
|
226
|
+
# They do NOT affect the Docker build — build-time env vars remain in Dockerfile via ENV.
|
|
227
|
+
# Max 16 vars, max 1024 chars per key/value.
|
|
228
|
+
# WARNING: Do not store raw secrets here. Use Secrets Manager ARN pattern instead:
|
|
229
|
+
# export IC_ENV_HF_TOKEN_ARN=arn:aws:secretsmanager:REGION:ACCOUNT:secret:NAME
|
|
230
|
+
# export IC_ENV_VLLM_MAX_MODEL_LEN=8192
|
|
231
|
+
# export IC_ENV_VLLM_GPU_MEMORY_UTILIZATION=0.85
|
|
232
|
+
<% } %>
|
|
233
|
+
|
|
214
234
|
# Framework-specific configuration
|
|
215
235
|
<% if (framework === 'transformers') { %>
|
|
216
236
|
export MODEL_NAME="<%= modelName %>"
|
|
237
|
+
# HuggingFace Model ID — preserved even after do/stage rewrites MODEL_NAME to S3.
|
|
238
|
+
# Used by do/benchmark (tokenizer), do/tune (model catalog), and do/test (chat template).
|
|
239
|
+
export HF_MODEL_ID="<%= modelName %>"
|
|
217
240
|
# Secrets Manager integration: when an ARN is configured, do-scripts resolve the
|
|
218
241
|
# secret at the appropriate stage (build-time or runtime). When a plaintext value
|
|
219
242
|
# is configured, it is exported directly. The _ARN suffix signals resolution is needed.
|
|
@@ -253,6 +276,7 @@ export TUNE_MODEL_ID="<%= tuneModelId %>"
|
|
|
253
276
|
|
|
254
277
|
<% if (framework === 'diffusors') { %>
|
|
255
278
|
export MODEL_NAME="<%= modelName %>"
|
|
279
|
+
export HF_MODEL_ID="<%= modelName %>"
|
|
256
280
|
# Secrets Manager integration: when an ARN is configured, do-scripts resolve the
|
|
257
281
|
# secret at the appropriate stage (build-time or runtime). When a plaintext value
|
|
258
282
|
# is configured, it is exported directly. The _ARN suffix signals resolution is needed.
|
|
@@ -6,6 +6,50 @@
|
|
|
6
6
|
# PROJECT_NAME, ENDPOINT_NAME, ECR_REPOSITORY, AWS_REGION, CONTAINER_ENV_JSON
|
|
7
7
|
# Also expects _update_config_var() to be available (from wait.sh).
|
|
8
8
|
|
|
9
|
+
# _collect_ic_env_vars()
|
|
10
|
+
# Reads IC_ENV_* prefixed variables from the environment (sourced from do/config),
|
|
11
|
+
# strips the IC_ENV_ prefix, validates constraints, and outputs JSON key-value pairs.
|
|
12
|
+
# Constraints: max 16 entries, max 1024 chars per key/value.
|
|
13
|
+
# IC_ENV_* overrides take precedence over CONTAINER_ENV_JSON.
|
|
14
|
+
#
|
|
15
|
+
# Sets IC_ENV_OVERRIDE in the caller's scope.
|
|
16
|
+
_collect_ic_env_vars() {
|
|
17
|
+
IC_ENV_OVERRIDE=""
|
|
18
|
+
local ic_env_count=0
|
|
19
|
+
|
|
20
|
+
while IFS='=' read -r full_key value; do
|
|
21
|
+
# Skip empty lines
|
|
22
|
+
[ -z "${full_key}" ] && continue
|
|
23
|
+
|
|
24
|
+
local stripped_key="${full_key#IC_ENV_}"
|
|
25
|
+
|
|
26
|
+
# Validate key length (AC-3.4)
|
|
27
|
+
if [ ${#stripped_key} -gt 1024 ]; then
|
|
28
|
+
echo "⚠️ IC_ENV_${stripped_key}: key exceeds 1024 chars, skipping" >&2
|
|
29
|
+
continue
|
|
30
|
+
fi
|
|
31
|
+
|
|
32
|
+
# Validate value length (AC-3.4)
|
|
33
|
+
if [ ${#value} -gt 1024 ]; then
|
|
34
|
+
echo "⚠️ IC_ENV_${stripped_key}: value exceeds 1024 chars, skipping" >&2
|
|
35
|
+
continue
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
ic_env_count=$((ic_env_count + 1))
|
|
39
|
+
|
|
40
|
+
# Max 16 env vars (AC-3.3)
|
|
41
|
+
if [ ${ic_env_count} -gt 16 ]; then
|
|
42
|
+
echo "⚠️ More than 16 IC_ENV_* variables defined. Using first 16 only." >&2
|
|
43
|
+
break
|
|
44
|
+
fi
|
|
45
|
+
|
|
46
|
+
if [ -n "${IC_ENV_OVERRIDE}" ]; then
|
|
47
|
+
IC_ENV_OVERRIDE="${IC_ENV_OVERRIDE},"
|
|
48
|
+
fi
|
|
49
|
+
IC_ENV_OVERRIDE="${IC_ENV_OVERRIDE}\"${stripped_key}\":\"${value}\""
|
|
50
|
+
done < <(env | grep "^IC_ENV_" | sort)
|
|
51
|
+
}
|
|
52
|
+
|
|
9
53
|
# create_inference_component <ic_config_file>
|
|
10
54
|
# Creates an inference component from a per-IC config file.
|
|
11
55
|
#
|
|
@@ -17,6 +61,10 @@
|
|
|
17
61
|
# IC_STARTUP_TIMEOUT — container startup health check timeout in seconds (default: 900)
|
|
18
62
|
# IC_CONTAINER_ENV_EXTRA — optional extra env vars in "KEY":"value" format
|
|
19
63
|
#
|
|
64
|
+
# IC_ENV_* prefixed vars from do/config are collected, validated, and passed
|
|
65
|
+
# as the Environment field in InferenceComponent.create() via SDK v3.
|
|
66
|
+
# Precedence: IC_ENV_* > IC_CONTAINER_ENV_EXTRA > CONTAINER_ENV_JSON
|
|
67
|
+
#
|
|
20
68
|
# Multi-spec support (for heterogeneous instance pools):
|
|
21
69
|
# IC_MULTI_SPEC — set to "true" to use Specifications (plural) array
|
|
22
70
|
# IC_SPEC_COUNT — number of spec entries (e.g., 2)
|
|
@@ -38,6 +86,9 @@ create_inference_component() {
|
|
|
38
86
|
# Source the IC config to get per-IC settings
|
|
39
87
|
source "${ic_conf}"
|
|
40
88
|
|
|
89
|
+
# Collect IC_ENV_* overrides from environment (sourced from do/config)
|
|
90
|
+
_collect_ic_env_vars
|
|
91
|
+
|
|
41
92
|
local ic_timestamp
|
|
42
93
|
ic_timestamp=$(date +%s)
|
|
43
94
|
local ic_basename
|
|
@@ -48,9 +99,11 @@ create_inference_component() {
|
|
|
48
99
|
local container_spec="{\"Image\":\"${ECR_REPOSITORY}:${IC_IMAGE_TAG:-${PROJECT_NAME}-latest}\""
|
|
49
100
|
# Always inject IC name for CW log forwarder
|
|
50
101
|
local ic_env="\"INFERENCE_COMPONENT_NAME\":\"${ic_name}\""
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
102
|
+
# Build environment JSON with precedence: IC_ENV_* > IC_CONTAINER_ENV_EXTRA > CONTAINER_ENV_JSON
|
|
103
|
+
local env_json="${CONTAINER_ENV_JSON}"
|
|
104
|
+
[ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
|
|
105
|
+
[ -n "${IC_ENV_OVERRIDE:-}" ] && env_json="${env_json:+${env_json},}${IC_ENV_OVERRIDE}"
|
|
106
|
+
if [ -n "${env_json}" ]; then
|
|
54
107
|
container_spec="${container_spec},\"Environment\":{${ic_env},${env_json}}"
|
|
55
108
|
else
|
|
56
109
|
container_spec="${container_spec},\"Environment\":{${ic_env}}"
|
|
@@ -44,5 +44,10 @@ except:
|
|
|
44
44
|
fi
|
|
45
45
|
fi
|
|
46
46
|
|
|
47
|
+
# Map commonly-used profile values to the variable names scripts expect.
|
|
48
|
+
# Explicit env vars take precedence (${X:-...} pattern).
|
|
49
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
|
|
50
|
+
CI_BENCHMARK_RESULTS_BUCKET="${CI_BENCHMARK_RESULTS_BUCKET:-${_PROFILE_ciBenchmarkResultsBucket:-}}"
|
|
51
|
+
|
|
47
52
|
# NOTE: set -u is NOT re-enabled here. The caller is responsible for managing
|
|
48
53
|
# their own shell options.
|