@aws/ml-container-creator 0.9.1 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +9304 -0
- package/bin/cli.js +2 -0
- package/config/bootstrap-e2e-stack.json +341 -0
- package/config/bootstrap-stack.json +40 -3
- package/config/parameter-schema-v2.json +2049 -0
- package/config/tune-catalog.json +1781 -0
- package/infra/ci-harness/buildspec.yml +1 -0
- package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
- package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
- package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
- package/package.json +53 -68
- package/servers/base-image-picker/index.js +121 -121
- package/servers/e2e-status/index.js +297 -0
- package/servers/e2e-status/manifest.json +14 -0
- package/servers/e2e-status/package.json +15 -0
- package/servers/endpoint-picker/LICENSE +202 -0
- package/servers/endpoint-picker/index.js +536 -0
- package/servers/endpoint-picker/manifest.json +14 -0
- package/servers/endpoint-picker/package.json +18 -0
- package/servers/hyperpod-cluster-picker/index.js +125 -125
- package/servers/instance-sizer/index.js +138 -138
- package/servers/instance-sizer/lib/instance-ranker.js +76 -76
- package/servers/instance-sizer/lib/model-resolver.js +61 -61
- package/servers/instance-sizer/lib/quota-resolver.js +113 -113
- package/servers/instance-sizer/lib/vram-estimator.js +31 -31
- package/servers/lib/bedrock-client.js +38 -38
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/model-servers.json +201 -3
- package/servers/lib/catalogs/models.json +182 -26
- package/servers/lib/custom-validators.js +13 -13
- package/servers/lib/dynamic-resolver.js +4 -4
- package/servers/marketplace-picker/index.js +342 -0
- package/servers/marketplace-picker/manifest.json +14 -0
- package/servers/marketplace-picker/package.json +18 -0
- package/servers/model-picker/index.js +382 -382
- package/servers/region-picker/index.js +56 -56
- package/servers/workload-picker/LICENSE +202 -0
- package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
- package/servers/workload-picker/index.js +171 -0
- package/servers/workload-picker/manifest.json +16 -0
- package/servers/workload-picker/package.json +16 -0
- package/src/app.js +4 -390
- package/src/lib/bootstrap-command-handler.js +710 -1148
- package/src/lib/bootstrap-config.js +36 -0
- package/src/lib/bootstrap-profile-manager.js +641 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/ci-register-helpers.js +74 -0
- package/src/lib/config-loader.js +408 -0
- package/src/lib/config-manager.js +66 -1685
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +149 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +315 -311
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +71 -71
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/path-prover-brain.js +607 -0
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +82 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +422 -0
- package/src/lib/tune-catalog-validator.js +37 -4
- package/templates/Dockerfile +9 -0
- package/templates/code/adapter_sidecar.py +444 -0
- package/templates/code/serve +6 -0
- package/templates/code/serve.d/vllm.ejs +1 -1
- package/templates/do/.benchmark_writer.py +1476 -0
- package/templates/do/.tune_helper.py +982 -57
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/adapter +149 -0
- package/templates/do/benchmark +639 -85
- package/templates/do/config +108 -5
- package/templates/do/deploy.d/managed-inference.ejs +192 -11
- package/templates/do/optimize +106 -37
- package/templates/do/register +89 -0
- package/templates/do/test +13 -0
- package/templates/do/tune +378 -59
- package/templates/do/validate +44 -4
- package/config/parameter-schema.json +0 -88
package/templates/do/optimize
CHANGED
|
@@ -106,6 +106,30 @@ elif [ -n "${INSTANCE_POOLS:-}" ]; then
|
|
|
106
106
|
elif [ -n "${INSTANCE_TYPE:-}" ]; then
|
|
107
107
|
INSTANCE_TYPES="${INSTANCE_TYPE}"
|
|
108
108
|
echo " Instances (from INSTANCE_TYPE): ${INSTANCE_TYPES}"
|
|
109
|
+
elif [ "${ENDPOINT_EXTERNAL:-}" = "true" ] && [ -n "${ENDPOINT_NAME:-}" ]; then
|
|
110
|
+
# External endpoint — query the live endpoint config for instance type
|
|
111
|
+
echo " Discovering instance type from external endpoint: ${ENDPOINT_NAME}"
|
|
112
|
+
ENDPOINT_CONFIG_NAME=$(aws sagemaker describe-endpoint \
|
|
113
|
+
--endpoint-name "${ENDPOINT_NAME}" \
|
|
114
|
+
--region "${AWS_REGION}" \
|
|
115
|
+
--query 'EndpointConfigName' \
|
|
116
|
+
--output text 2>/dev/null) || ENDPOINT_CONFIG_NAME=""
|
|
117
|
+
|
|
118
|
+
if [ -n "${ENDPOINT_CONFIG_NAME}" ]; then
|
|
119
|
+
INSTANCE_TYPES=$(aws sagemaker describe-endpoint-config \
|
|
120
|
+
--endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
|
|
121
|
+
--region "${AWS_REGION}" \
|
|
122
|
+
--query 'ProductionVariants[0].InstanceType' \
|
|
123
|
+
--output text 2>/dev/null) || INSTANCE_TYPES=""
|
|
124
|
+
fi
|
|
125
|
+
|
|
126
|
+
if [ -n "${INSTANCE_TYPES}" ] && [ "${INSTANCE_TYPES}" != "None" ]; then
|
|
127
|
+
echo " Instances (from endpoint): ${INSTANCE_TYPES}"
|
|
128
|
+
else
|
|
129
|
+
echo "❌ Could not discover instance type from endpoint: ${ENDPOINT_NAME}"
|
|
130
|
+
echo " Provide --instances flag, or set INSTANCE_TYPE in do/config."
|
|
131
|
+
exit 1
|
|
132
|
+
fi
|
|
109
133
|
else
|
|
110
134
|
echo "❌ No instance types available."
|
|
111
135
|
echo " Provide --instances flag, or set INSTANCE_POOLS or INSTANCE_TYPE in do/config."
|
|
@@ -132,9 +156,9 @@ RESUME_EXISTING=false
|
|
|
132
156
|
|
|
133
157
|
if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
|
|
134
158
|
EXISTING_STATUS=$(aws sagemaker describe-ai-recommendation-job \
|
|
135
|
-
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
159
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
|
|
136
160
|
--region "${AWS_REGION}" \
|
|
137
|
-
--query '
|
|
161
|
+
--query 'AIRecommendationJobStatus' \
|
|
138
162
|
--output text 2>/dev/null) || EXISTING_STATUS=""
|
|
139
163
|
|
|
140
164
|
case "${EXISTING_STATUS}" in
|
|
@@ -154,7 +178,7 @@ if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
|
|
|
154
178
|
;;
|
|
155
179
|
FAILED|STOPPED)
|
|
156
180
|
FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
|
|
157
|
-
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
181
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
|
|
158
182
|
--region "${AWS_REGION}" \
|
|
159
183
|
--query 'FailureReason' \
|
|
160
184
|
--output text 2>/dev/null) || FAILURE_REASON="unknown"
|
|
@@ -174,44 +198,86 @@ fi
|
|
|
174
198
|
# ── Create recommendation job ─────────────────────────────────────────────────
|
|
175
199
|
if [ "${RESUME_EXISTING}" = false ]; then
|
|
176
200
|
OPTIMIZE_JOB_NAME="${PROJECT_NAME}-optimize-$(date +%Y%m%d-%H%M%S)"
|
|
201
|
+
WORKLOAD_CONFIG_NAME="${OPTIMIZE_JOB_NAME}-workload"
|
|
177
202
|
|
|
178
203
|
echo "🚀 Creating AI Recommendation Job: ${OPTIMIZE_JOB_NAME}"
|
|
179
204
|
|
|
180
|
-
#
|
|
181
|
-
|
|
182
|
-
|
|
205
|
+
# Step 1: Create workload config
|
|
206
|
+
echo " Creating workload config: ${WORKLOAD_CONFIG_NAME}"
|
|
207
|
+
|
|
208
|
+
WORKLOAD_SPEC_INNER="{\"benchmark\": {\"type\": \"aiperf\"}, \"parameters\": {\"prompt_input_tokens_mean\": ${INPUT_TOKENS}, \"prompt_input_tokens_stddev\": 150, \"output_tokens_mean\": ${OUTPUT_TOKENS}, \"output_tokens_stddev\": 50, \"concurrency\": ${CONCURRENCY}, \"streaming\": true}}"
|
|
209
|
+
WORKLOAD_SPEC_OUTER="{\"WorkloadSpec\": {\"Inline\": $(python3 -c "import json; print(json.dumps('${WORKLOAD_SPEC_INNER}'))")}}"
|
|
210
|
+
|
|
211
|
+
if ! aws sagemaker create-ai-workload-config \
|
|
212
|
+
--ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
|
|
213
|
+
--ai-workload-configs "${WORKLOAD_SPEC_OUTER}" \
|
|
214
|
+
--region "${AWS_REGION}" 2>&1 | grep -q "AIWorkloadConfigArn"; then
|
|
215
|
+
echo "❌ Failed to create workload config: ${WORKLOAD_CONFIG_NAME}"
|
|
216
|
+
echo " Check that the execution role has sagemaker:CreateAIWorkloadConfig permission."
|
|
217
|
+
# Show actual error
|
|
218
|
+
aws sagemaker create-ai-workload-config \
|
|
219
|
+
--ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
|
|
220
|
+
--ai-workload-configs "${WORKLOAD_SPEC_OUTER}" \
|
|
221
|
+
--region "${AWS_REGION}" 2>&1 || true
|
|
222
|
+
exit 1
|
|
223
|
+
fi
|
|
224
|
+
echo " ✅ Workload config created"
|
|
225
|
+
|
|
226
|
+
# Step 2: Build compute spec (instance types, max 3)
|
|
227
|
+
COMPUTE_SPEC_JSON=""
|
|
183
228
|
IFS=',' read -ra TYPES <<< "${INSTANCE_TYPES}"
|
|
229
|
+
INSTANCE_LIST=""
|
|
230
|
+
COUNT=0
|
|
184
231
|
for itype in "${TYPES[@]}"; do
|
|
185
232
|
itype=$(echo "${itype}" | xargs) # trim whitespace
|
|
186
|
-
if [
|
|
187
|
-
|
|
188
|
-
|
|
233
|
+
if [ ${COUNT} -ge 3 ]; then
|
|
234
|
+
echo " ⚠️ Max 3 instance types supported — truncating"
|
|
235
|
+
break
|
|
236
|
+
fi
|
|
237
|
+
if [ -n "${INSTANCE_LIST}" ]; then
|
|
238
|
+
INSTANCE_LIST="${INSTANCE_LIST},\"${itype}\""
|
|
189
239
|
else
|
|
190
|
-
|
|
240
|
+
INSTANCE_LIST="\"${itype}\""
|
|
191
241
|
fi
|
|
242
|
+
COUNT=$((COUNT + 1))
|
|
192
243
|
done
|
|
193
|
-
|
|
244
|
+
COMPUTE_SPEC_JSON="InstanceTypes=[${INSTANCE_LIST}]"
|
|
245
|
+
|
|
246
|
+
# Step 3: Map goal to performance target metric
|
|
247
|
+
case "${GOAL}" in
|
|
248
|
+
latency) PERF_METRIC="ttft-ms" ;;
|
|
249
|
+
throughput) PERF_METRIC="throughput" ;;
|
|
250
|
+
cost) PERF_METRIC="cost" ;;
|
|
251
|
+
esac
|
|
194
252
|
|
|
195
|
-
#
|
|
196
|
-
# The
|
|
197
|
-
|
|
253
|
+
# Step 4: Determine model source
|
|
254
|
+
# The recommendation API requires model artifacts as s3:// or https:// URI.
|
|
255
|
+
MODEL_SOURCE_ARG=""
|
|
198
256
|
if [[ "${MODEL_NAME}" == s3://* ]]; then
|
|
199
|
-
|
|
257
|
+
MODEL_SOURCE_ARG="S3={S3Uri=${MODEL_NAME}}"
|
|
200
258
|
else
|
|
201
|
-
|
|
259
|
+
# HuggingFace model — use the HTTPS URL for the model on HuggingFace Hub
|
|
260
|
+
MODEL_SOURCE_ARG="S3={S3Uri=https://huggingface.co/${MODEL_NAME}}"
|
|
202
261
|
fi
|
|
203
262
|
|
|
204
|
-
#
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
--
|
|
212
|
-
--
|
|
213
|
-
--
|
|
214
|
-
--
|
|
263
|
+
# Step 5: Create the recommendation job
|
|
264
|
+
S3_OUTPUT="s3://${TUNE_S3_BUCKET:-mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null)-${AWS_REGION}}/${PROJECT_NAME}/optimize/"
|
|
265
|
+
|
|
266
|
+
RECOMMEND_CMD=(
|
|
267
|
+
aws sagemaker create-ai-recommendation-job
|
|
268
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}"
|
|
269
|
+
--model-source "${MODEL_SOURCE_ARG}"
|
|
270
|
+
--output-config "S3OutputLocation=${S3_OUTPUT}"
|
|
271
|
+
--ai-workload-config-identifier "${WORKLOAD_CONFIG_NAME}"
|
|
272
|
+
--performance-target "Constraints=[{Metric=${PERF_METRIC}}]"
|
|
273
|
+
--role-arn "${ROLE_ARN}"
|
|
274
|
+
--compute-spec "${COMPUTE_SPEC_JSON}"
|
|
275
|
+
--inference-specification "Framework=VLLM"
|
|
276
|
+
--region "${AWS_REGION}"
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
if ! "${RECOMMEND_CMD[@]}" 2>&1; then
|
|
280
|
+
echo ""
|
|
215
281
|
echo "❌ Failed to create AI Recommendation Job"
|
|
216
282
|
echo " Check that:"
|
|
217
283
|
echo " • The execution role has sagemaker:CreateAIRecommendationJob permission"
|
|
@@ -232,7 +298,7 @@ fi
|
|
|
232
298
|
POLL_INTERVAL=30
|
|
233
299
|
MAX_POLL_ATTEMPTS=120 # 60 minutes max (120 * 30s)
|
|
234
300
|
|
|
235
|
-
if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ]; then
|
|
301
|
+
if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ] && [ "${JOB_STATUS:-}" != "Completed" ] && [ "${JOB_STATUS:-}" != "Failed" ] && [ "${JOB_STATUS:-}" != "Stopped" ]; then
|
|
236
302
|
|
|
237
303
|
echo "⏳ Waiting for recommendation job to complete..."
|
|
238
304
|
echo " Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 60 min)"
|
|
@@ -243,27 +309,30 @@ JOB_STATUS=""
|
|
|
243
309
|
|
|
244
310
|
while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
|
|
245
311
|
JOB_STATUS=$(aws sagemaker describe-ai-recommendation-job \
|
|
246
|
-
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
312
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
|
|
247
313
|
--region "${AWS_REGION}" \
|
|
248
|
-
--query '
|
|
314
|
+
--query 'AIRecommendationJobStatus' \
|
|
249
315
|
--output text 2>/dev/null) || {
|
|
250
316
|
echo "⚠️ Failed to describe recommendation job (credentials may have expired)"
|
|
251
317
|
echo " Re-run to check status:"
|
|
252
|
-
echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
|
|
318
|
+
echo " aws sagemaker describe-ai-recommendation-job --ai-recommendation-job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
|
|
253
319
|
exit 1
|
|
254
320
|
}
|
|
255
321
|
|
|
256
322
|
case "${JOB_STATUS}" in
|
|
257
|
-
COMPLETED)
|
|
323
|
+
COMPLETED|Completed)
|
|
258
324
|
echo "✅ Recommendation job completed!"
|
|
325
|
+
JOB_STATUS="COMPLETED"
|
|
259
326
|
break
|
|
260
327
|
;;
|
|
261
|
-
FAILED)
|
|
328
|
+
FAILED|Failed)
|
|
262
329
|
echo "❌ Recommendation job failed"
|
|
330
|
+
JOB_STATUS="FAILED"
|
|
263
331
|
break
|
|
264
332
|
;;
|
|
265
|
-
STOPPED)
|
|
333
|
+
STOPPED|Stopped)
|
|
266
334
|
echo "⚠️ Recommendation job was stopped"
|
|
335
|
+
JOB_STATUS="STOPPED"
|
|
267
336
|
break
|
|
268
337
|
;;
|
|
269
338
|
*)
|
|
@@ -292,7 +361,7 @@ if [ "${JOB_STATUS}" = "COMPLETED" ]; then
|
|
|
292
361
|
|
|
293
362
|
# Get the full job description with results
|
|
294
363
|
JOB_DESCRIPTION=$(aws sagemaker describe-ai-recommendation-job \
|
|
295
|
-
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
364
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
|
|
296
365
|
--region "${AWS_REGION}" \
|
|
297
366
|
--output json 2>/dev/null) || {
|
|
298
367
|
echo "❌ Failed to fetch recommendation results"
|
|
@@ -503,7 +572,7 @@ except:
|
|
|
503
572
|
|
|
504
573
|
if [ "${FAILURE_REASON}" = "unknown" ]; then
|
|
505
574
|
FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
|
|
506
|
-
--job-name "${OPTIMIZE_JOB_NAME}" \
|
|
575
|
+
--ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
|
|
507
576
|
--region "${AWS_REGION}" \
|
|
508
577
|
--query 'FailureReason' \
|
|
509
578
|
--output text 2>/dev/null) || FAILURE_REASON="unknown"
|
|
@@ -513,7 +582,7 @@ except:
|
|
|
513
582
|
echo " Reason: ${FAILURE_REASON}"
|
|
514
583
|
echo ""
|
|
515
584
|
echo " Debug:"
|
|
516
|
-
echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
|
|
585
|
+
echo " aws sagemaker describe-ai-recommendation-job --ai-recommendation-job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
|
|
517
586
|
|
|
518
587
|
elif [ "${JOB_STATUS}" = "STOPPED" ]; then
|
|
519
588
|
echo "⚠️ Recommendation job was stopped before completion"
|
package/templates/do/register
CHANGED
|
@@ -22,6 +22,8 @@ JSON_OUTPUT=false
|
|
|
22
22
|
CI_MODE=false
|
|
23
23
|
CI_TABLE_NAME="${CI_TABLE_NAME:-mlcc-ci-table}"
|
|
24
24
|
CI_BUILD_STRATEGY="codebuild-submit"
|
|
25
|
+
BENCHMARK_STATUS=""
|
|
26
|
+
BENCHMARK_RUN_ID=""
|
|
25
27
|
|
|
26
28
|
while [[ $# -gt 0 ]]; do
|
|
27
29
|
case "$1" in
|
|
@@ -70,15 +72,102 @@ while [[ $# -gt 0 ]]; do
|
|
|
70
72
|
CI_BUILD_STRATEGY="${1#*=}"
|
|
71
73
|
shift
|
|
72
74
|
;;
|
|
75
|
+
--benchmark-status)
|
|
76
|
+
BENCHMARK_STATUS="$2"
|
|
77
|
+
shift 2
|
|
78
|
+
;;
|
|
79
|
+
--benchmark-status=*)
|
|
80
|
+
BENCHMARK_STATUS="${1#*=}"
|
|
81
|
+
shift
|
|
82
|
+
;;
|
|
83
|
+
--benchmark-run-id)
|
|
84
|
+
BENCHMARK_RUN_ID="$2"
|
|
85
|
+
shift 2
|
|
86
|
+
;;
|
|
87
|
+
--benchmark-run-id=*)
|
|
88
|
+
BENCHMARK_RUN_ID="${1#*=}"
|
|
89
|
+
shift
|
|
90
|
+
;;
|
|
73
91
|
*)
|
|
74
92
|
echo "⚠️ Unknown option: $1"
|
|
75
93
|
echo ""
|
|
76
94
|
echo "Usage: ./do/register [--notes \"text\"] [--status success|partial|failed] [--project] [--json] [--ci] [--ci-table <name>] [--build-strategy <strategy>]"
|
|
95
|
+
echo " ./do/register --benchmark-status <completed|failed|in-progress> --benchmark-run-id <run-id>"
|
|
77
96
|
exit 1
|
|
78
97
|
;;
|
|
79
98
|
esac
|
|
80
99
|
done
|
|
81
100
|
|
|
101
|
+
# ============================================================
|
|
102
|
+
# Handle --benchmark-status mode (Stage 2 failure isolation)
|
|
103
|
+
# This mode ONLY writes benchmark fields to DynamoDB.
|
|
104
|
+
# It uses an UpdateExpression that sets ONLY:
|
|
105
|
+
# lastBenchmarkRunId, lastBenchmarkTimestamp, lastBenchmarkStatus
|
|
106
|
+
# It does NOT touch testStatus, configJson, or any other fields.
|
|
107
|
+
# This ensures Stage 2 failure does not change the CI gate result.
|
|
108
|
+
# Requirements: 1.4, 7.3
|
|
109
|
+
# ============================================================
|
|
110
|
+
|
|
111
|
+
if [ -n "${BENCHMARK_STATUS}" ]; then
|
|
112
|
+
# Validate benchmark status
|
|
113
|
+
case "${BENCHMARK_STATUS}" in
|
|
114
|
+
completed|failed|in-progress) ;;
|
|
115
|
+
*)
|
|
116
|
+
echo "❌ Invalid benchmark status: ${BENCHMARK_STATUS}"
|
|
117
|
+
echo " Valid values: completed, failed, in-progress"
|
|
118
|
+
exit 1
|
|
119
|
+
;;
|
|
120
|
+
esac
|
|
121
|
+
|
|
122
|
+
if [ -z "${BENCHMARK_RUN_ID}" ]; then
|
|
123
|
+
echo "❌ --benchmark-run-id is required when using --benchmark-status"
|
|
124
|
+
exit 1
|
|
125
|
+
fi
|
|
126
|
+
|
|
127
|
+
# Compute configId (same hash logic used by write_ci_record below)
|
|
128
|
+
_benchmark_input="${DEPLOYMENT_CONFIG}:${MODEL_NAME:-none}:${INSTANCE_TYPE}:${AWS_REGION}:${DEPLOYMENT_TARGET}:ic${IC_COUNT:-1}:adapt${ADAPTER_COUNT:-0}"
|
|
129
|
+
if command -v sha256sum &> /dev/null; then
|
|
130
|
+
CONFIG_ID=$(echo -n "$_benchmark_input" | sha256sum | cut -c1-16)
|
|
131
|
+
else
|
|
132
|
+
CONFIG_ID=$(echo -n "$_benchmark_input" | shasum -a 256 | cut -c1-16)
|
|
133
|
+
fi
|
|
134
|
+
BENCHMARK_TIMESTAMP=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
135
|
+
|
|
136
|
+
echo "📊 Recording benchmark status for configId: ${CONFIG_ID}"
|
|
137
|
+
echo " Benchmark run ID: ${BENCHMARK_RUN_ID}"
|
|
138
|
+
echo " Benchmark status: ${BENCHMARK_STATUS}"
|
|
139
|
+
echo " Timestamp: ${BENCHMARK_TIMESTAMP}"
|
|
140
|
+
echo ""
|
|
141
|
+
|
|
142
|
+
# Check if CI_Table exists
|
|
143
|
+
if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" --region "${AWS_REGION}" &>/dev/null; then
|
|
144
|
+
echo "⚠️ CI infrastructure not provisioned. Run 'ml-container-creator bootstrap' with CI enabled."
|
|
145
|
+
echo " Skipping benchmark status write."
|
|
146
|
+
exit 0
|
|
147
|
+
fi
|
|
148
|
+
|
|
149
|
+
# UpdateExpression ONLY sets the 3 benchmark fields.
|
|
150
|
+
# This is a SET operation — it does NOT overwrite the entire record (PutItem).
|
|
151
|
+
# testStatus, configJson, and all other fields remain unchanged.
|
|
152
|
+
if aws dynamodb update-item \
|
|
153
|
+
--region "${AWS_REGION}" \
|
|
154
|
+
--table-name "${CI_TABLE_NAME}" \
|
|
155
|
+
--key "{\"configId\": {\"S\": \"${CONFIG_ID}\"}}" \
|
|
156
|
+
--update-expression "SET lastBenchmarkRunId = :rid, lastBenchmarkTimestamp = :ts, lastBenchmarkStatus = :bs" \
|
|
157
|
+
--expression-attribute-values "{
|
|
158
|
+
\":rid\": {\"S\": \"${BENCHMARK_RUN_ID}\"},
|
|
159
|
+
\":ts\": {\"S\": \"${BENCHMARK_TIMESTAMP}\"},
|
|
160
|
+
\":bs\": {\"S\": \"${BENCHMARK_STATUS}\"}
|
|
161
|
+
}" 2>/dev/null; then
|
|
162
|
+
echo "✅ Benchmark status recorded: ${BENCHMARK_STATUS}"
|
|
163
|
+
else
|
|
164
|
+
echo "❌ Failed to write benchmark status for configId: ${CONFIG_ID}"
|
|
165
|
+
exit 1
|
|
166
|
+
fi
|
|
167
|
+
|
|
168
|
+
exit 0
|
|
169
|
+
fi
|
|
170
|
+
|
|
82
171
|
# Validate status
|
|
83
172
|
case "${STATUS}" in
|
|
84
173
|
success|partial|failed) ;;
|
package/templates/do/test
CHANGED
|
@@ -214,6 +214,7 @@ else
|
|
|
214
214
|
# Resolve inference component name
|
|
215
215
|
# Precedence: do/adapters/ → do/ic/ → legacy config
|
|
216
216
|
IC_NAME=""
|
|
217
|
+
ADAPTER_MODEL_NAME=""
|
|
217
218
|
if [ -n "${IC_ARG}" ] && [ -f "${SCRIPT_DIR}/adapters/${IC_ARG}.conf" ]; then
|
|
218
219
|
# Argument matches an adapter name — use adapter IC
|
|
219
220
|
ADAPTER_IC_NAME=""
|
|
@@ -223,6 +224,7 @@ else
|
|
|
223
224
|
exit 1
|
|
224
225
|
fi
|
|
225
226
|
IC_NAME="${ADAPTER_IC_NAME}"
|
|
227
|
+
ADAPTER_MODEL_NAME="${IC_ARG}"
|
|
226
228
|
elif [ -n "${IC_ARG}" ]; then
|
|
227
229
|
# Explicit IC name provided as argument
|
|
228
230
|
IC_CONF="${SCRIPT_DIR}/ic/${IC_ARG}.conf"
|
|
@@ -258,6 +260,17 @@ else
|
|
|
258
260
|
IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
|
|
259
261
|
fi
|
|
260
262
|
|
|
263
|
+
# If testing an adapter, override the model name in the payload
|
|
264
|
+
if [ -n "${ADAPTER_MODEL_NAME}" ] && [ "${FRAMEWORK}" = "transformers" ]; then
|
|
265
|
+
case "${MODEL_SERVER}" in
|
|
266
|
+
vllm|sglang)
|
|
267
|
+
TEST_PAYLOAD='{"model": "'"${ADAPTER_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
|
|
268
|
+
echo "${TEST_PAYLOAD}" > "${TEMP_PAYLOAD}"
|
|
269
|
+
echo " (Using adapter model name: ${ADAPTER_MODEL_NAME})"
|
|
270
|
+
;;
|
|
271
|
+
esac
|
|
272
|
+
fi
|
|
273
|
+
|
|
261
274
|
INVOKE_ARGS=(
|
|
262
275
|
--endpoint-name "${ENDPOINT_NAME}"
|
|
263
276
|
--region "${AWS_REGION}"
|