@aws/ml-container-creator 0.10.0 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +5 -21
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +51 -66
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +138 -138
  23. package/servers/instance-sizer/lib/instance-ranker.js +76 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/model-servers.json +201 -3
  29. package/servers/lib/custom-validators.js +13 -13
  30. package/servers/lib/dynamic-resolver.js +4 -4
  31. package/servers/marketplace-picker/index.js +342 -0
  32. package/servers/marketplace-picker/manifest.json +14 -0
  33. package/servers/marketplace-picker/package.json +18 -0
  34. package/servers/model-picker/index.js +382 -382
  35. package/servers/region-picker/index.js +56 -56
  36. package/servers/workload-picker/LICENSE +202 -0
  37. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  38. package/servers/workload-picker/index.js +171 -0
  39. package/servers/workload-picker/manifest.json +16 -0
  40. package/servers/workload-picker/package.json +16 -0
  41. package/src/app.js +4 -2
  42. package/src/lib/bootstrap-command-handler.js +579 -14
  43. package/src/lib/bootstrap-config.js +36 -0
  44. package/src/lib/bootstrap-profile-manager.js +48 -41
  45. package/src/lib/ci-register-helpers.js +74 -0
  46. package/src/lib/config-loader.js +3 -0
  47. package/src/lib/config-manager.js +7 -0
  48. package/src/lib/cuda-resolver.js +17 -8
  49. package/src/lib/generated/cli-options.js +315 -315
  50. package/src/lib/generated/parameter-matrix.js +661 -661
  51. package/src/lib/generated/validation-rules.js +71 -71
  52. package/src/lib/path-prover-brain.js +607 -0
  53. package/src/lib/prompts/project-prompts.js +12 -0
  54. package/src/lib/template-variable-resolver.js +25 -1
  55. package/src/lib/tune-catalog-validator.js +37 -4
  56. package/templates/Dockerfile +9 -0
  57. package/templates/code/adapter_sidecar.py +444 -0
  58. package/templates/code/serve +6 -0
  59. package/templates/code/serve.d/vllm.ejs +1 -1
  60. package/templates/do/.benchmark_writer.py +1476 -0
  61. package/templates/do/.tune_helper.py +982 -57
  62. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  63. package/templates/do/adapter +149 -0
  64. package/templates/do/benchmark +639 -85
  65. package/templates/do/config +108 -5
  66. package/templates/do/deploy.d/managed-inference.ejs +192 -11
  67. package/templates/do/optimize +106 -37
  68. package/templates/do/register +89 -0
  69. package/templates/do/test +13 -0
  70. package/templates/do/tune +378 -59
  71. package/templates/do/validate +44 -4
@@ -106,6 +106,30 @@ elif [ -n "${INSTANCE_POOLS:-}" ]; then
106
106
  elif [ -n "${INSTANCE_TYPE:-}" ]; then
107
107
  INSTANCE_TYPES="${INSTANCE_TYPE}"
108
108
  echo " Instances (from INSTANCE_TYPE): ${INSTANCE_TYPES}"
109
+ elif [ "${ENDPOINT_EXTERNAL:-}" = "true" ] && [ -n "${ENDPOINT_NAME:-}" ]; then
110
+ # External endpoint — query the live endpoint config for instance type
111
+ echo " Discovering instance type from external endpoint: ${ENDPOINT_NAME}"
112
+ ENDPOINT_CONFIG_NAME=$(aws sagemaker describe-endpoint \
113
+ --endpoint-name "${ENDPOINT_NAME}" \
114
+ --region "${AWS_REGION}" \
115
+ --query 'EndpointConfigName' \
116
+ --output text 2>/dev/null) || ENDPOINT_CONFIG_NAME=""
117
+
118
+ if [ -n "${ENDPOINT_CONFIG_NAME}" ]; then
119
+ INSTANCE_TYPES=$(aws sagemaker describe-endpoint-config \
120
+ --endpoint-config-name "${ENDPOINT_CONFIG_NAME}" \
121
+ --region "${AWS_REGION}" \
122
+ --query 'ProductionVariants[0].InstanceType' \
123
+ --output text 2>/dev/null) || INSTANCE_TYPES=""
124
+ fi
125
+
126
+ if [ -n "${INSTANCE_TYPES}" ] && [ "${INSTANCE_TYPES}" != "None" ]; then
127
+ echo " Instances (from endpoint): ${INSTANCE_TYPES}"
128
+ else
129
+ echo "❌ Could not discover instance type from endpoint: ${ENDPOINT_NAME}"
130
+ echo " Provide --instances flag, or set INSTANCE_TYPE in do/config."
131
+ exit 1
132
+ fi
109
133
  else
110
134
  echo "❌ No instance types available."
111
135
  echo " Provide --instances flag, or set INSTANCE_POOLS or INSTANCE_TYPE in do/config."
@@ -132,9 +156,9 @@ RESUME_EXISTING=false
132
156
 
133
157
  if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
134
158
  EXISTING_STATUS=$(aws sagemaker describe-ai-recommendation-job \
135
- --job-name "${OPTIMIZE_JOB_NAME}" \
159
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
136
160
  --region "${AWS_REGION}" \
137
- --query 'Status' \
161
+ --query 'AIRecommendationJobStatus' \
138
162
  --output text 2>/dev/null) || EXISTING_STATUS=""
139
163
 
140
164
  case "${EXISTING_STATUS}" in
@@ -154,7 +178,7 @@ if [ "${FORCE}" = false ] && [ -n "${OPTIMIZE_JOB_NAME:-}" ]; then
154
178
  ;;
155
179
  FAILED|STOPPED)
156
180
  FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
157
- --job-name "${OPTIMIZE_JOB_NAME}" \
181
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
158
182
  --region "${AWS_REGION}" \
159
183
  --query 'FailureReason' \
160
184
  --output text 2>/dev/null) || FAILURE_REASON="unknown"
@@ -174,44 +198,86 @@ fi
174
198
  # ── Create recommendation job ─────────────────────────────────────────────────
175
199
  if [ "${RESUME_EXISTING}" = false ]; then
176
200
  OPTIMIZE_JOB_NAME="${PROJECT_NAME}-optimize-$(date +%Y%m%d-%H%M%S)"
201
+ WORKLOAD_CONFIG_NAME="${OPTIMIZE_JOB_NAME}-workload"
177
202
 
178
203
  echo "🚀 Creating AI Recommendation Job: ${OPTIMIZE_JOB_NAME}"
179
204
 
180
- # Build instance type list as JSON array
181
- INSTANCE_TYPES_JSON="["
182
- FIRST=true
205
+ # Step 1: Create workload config
206
+ echo " Creating workload config: ${WORKLOAD_CONFIG_NAME}"
207
+
208
+ WORKLOAD_SPEC_INNER="{\"benchmark\": {\"type\": \"aiperf\"}, \"parameters\": {\"prompt_input_tokens_mean\": ${INPUT_TOKENS}, \"prompt_input_tokens_stddev\": 150, \"output_tokens_mean\": ${OUTPUT_TOKENS}, \"output_tokens_stddev\": 50, \"concurrency\": ${CONCURRENCY}, \"streaming\": true}}"
209
+ WORKLOAD_SPEC_OUTER="{\"WorkloadSpec\": {\"Inline\": $(python3 -c "import json; print(json.dumps('${WORKLOAD_SPEC_INNER}'))")}}"
210
+
211
+ if ! aws sagemaker create-ai-workload-config \
212
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
213
+ --ai-workload-configs "${WORKLOAD_SPEC_OUTER}" \
214
+ --region "${AWS_REGION}" 2>&1 | grep -q "AIWorkloadConfigArn"; then
215
+ echo "❌ Failed to create workload config: ${WORKLOAD_CONFIG_NAME}"
216
+ echo " Check that the execution role has sagemaker:CreateAIWorkloadConfig permission."
217
+ # Show actual error
218
+ aws sagemaker create-ai-workload-config \
219
+ --ai-workload-config-name "${WORKLOAD_CONFIG_NAME}" \
220
+ --ai-workload-configs "${WORKLOAD_SPEC_OUTER}" \
221
+ --region "${AWS_REGION}" 2>&1 || true
222
+ exit 1
223
+ fi
224
+ echo " ✅ Workload config created"
225
+
226
+ # Step 2: Build compute spec (instance types, max 3)
227
+ COMPUTE_SPEC_JSON=""
183
228
  IFS=',' read -ra TYPES <<< "${INSTANCE_TYPES}"
229
+ INSTANCE_LIST=""
230
+ COUNT=0
184
231
  for itype in "${TYPES[@]}"; do
185
232
  itype=$(echo "${itype}" | xargs) # trim whitespace
186
- if [ "${FIRST}" = true ]; then
187
- INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON}\"${itype}\""
188
- FIRST=false
233
+ if [ ${COUNT} -ge 3 ]; then
234
+ echo " ⚠️ Max 3 instance types supported — truncating"
235
+ break
236
+ fi
237
+ if [ -n "${INSTANCE_LIST}" ]; then
238
+ INSTANCE_LIST="${INSTANCE_LIST},\"${itype}\""
189
239
  else
190
- INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON},\"${itype}\""
240
+ INSTANCE_LIST="\"${itype}\""
191
241
  fi
242
+ COUNT=$((COUNT + 1))
192
243
  done
193
- INSTANCE_TYPES_JSON="${INSTANCE_TYPES_JSON}]"
244
+ COMPUTE_SPEC_JSON="InstanceTypes=[${INSTANCE_LIST}]"
245
+
246
+ # Step 3: Map goal to performance target metric
247
+ case "${GOAL}" in
248
+ latency) PERF_METRIC="ttft-ms" ;;
249
+ throughput) PERF_METRIC="throughput" ;;
250
+ cost) PERF_METRIC="cost" ;;
251
+ esac
194
252
 
195
- # Build job input config
196
- # The model is specified as either a HuggingFace model ID or S3 path
197
- MODEL_SOURCE_JSON=""
253
+ # Step 4: Determine model source
254
+ # The recommendation API requires model artifacts as s3:// or https:// URI.
255
+ MODEL_SOURCE_ARG=""
198
256
  if [[ "${MODEL_NAME}" == s3://* ]]; then
199
- MODEL_SOURCE_JSON="{\"S3DataSource\":{\"S3Uri\":\"${MODEL_NAME}\"}}"
257
+ MODEL_SOURCE_ARG="S3={S3Uri=${MODEL_NAME}}"
200
258
  else
201
- MODEL_SOURCE_JSON="{\"ModelName\":\"${MODEL_NAME}\"}"
259
+ # HuggingFace model — use the HTTPS URL for the model on HuggingFace Hub
260
+ MODEL_SOURCE_ARG="S3={S3Uri=https://huggingface.co/${MODEL_NAME}}"
202
261
  fi
203
262
 
204
- # Build workload config
205
- WORKLOAD_JSON="{\"Concurrency\":${CONCURRENCY},\"InputTokens\":${INPUT_TOKENS},\"OutputTokens\":${OUTPUT_TOKENS}}"
206
-
207
- # Build the full job specification
208
- JOB_INPUT="{\"ModelSource\":${MODEL_SOURCE_JSON},\"Workload\":${WORKLOAD_JSON},\"InstanceTypes\":${INSTANCE_TYPES_JSON},\"OptimizationGoal\":\"${GOAL}\"}"
209
-
210
- if ! aws sagemaker create-ai-recommendation-job \
211
- --job-name "${OPTIMIZE_JOB_NAME}" \
212
- --role-arn "${ROLE_ARN}" \
213
- --input-config "${JOB_INPUT}" \
214
- --region "${AWS_REGION}"; then
263
+ # Step 5: Create the recommendation job
264
+ S3_OUTPUT="s3://${TUNE_S3_BUCKET:-mlcc-tune-$(aws sts get-caller-identity --query Account --output text 2>/dev/null)-${AWS_REGION}}/${PROJECT_NAME}/optimize/"
265
+
266
+ RECOMMEND_CMD=(
267
+ aws sagemaker create-ai-recommendation-job
268
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}"
269
+ --model-source "${MODEL_SOURCE_ARG}"
270
+ --output-config "S3OutputLocation=${S3_OUTPUT}"
271
+ --ai-workload-config-identifier "${WORKLOAD_CONFIG_NAME}"
272
+ --performance-target "Constraints=[{Metric=${PERF_METRIC}}]"
273
+ --role-arn "${ROLE_ARN}"
274
+ --compute-spec "${COMPUTE_SPEC_JSON}"
275
+ --inference-specification "Framework=VLLM"
276
+ --region "${AWS_REGION}"
277
+ )
278
+
279
+ if ! "${RECOMMEND_CMD[@]}" 2>&1; then
280
+ echo ""
215
281
  echo "❌ Failed to create AI Recommendation Job"
216
282
  echo " Check that:"
217
283
  echo " • The execution role has sagemaker:CreateAIRecommendationJob permission"
@@ -232,7 +298,7 @@ fi
232
298
  POLL_INTERVAL=30
233
299
  MAX_POLL_ATTEMPTS=120 # 60 minutes max (120 * 30s)
234
300
 
235
- if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ]; then
301
+ if [ "${JOB_STATUS:-}" != "COMPLETED" ] && [ "${JOB_STATUS:-}" != "FAILED" ] && [ "${JOB_STATUS:-}" != "STOPPED" ] && [ "${JOB_STATUS:-}" != "Completed" ] && [ "${JOB_STATUS:-}" != "Failed" ] && [ "${JOB_STATUS:-}" != "Stopped" ]; then
236
302
 
237
303
  echo "⏳ Waiting for recommendation job to complete..."
238
304
  echo " Polling every ${POLL_INTERVAL}s (max ${MAX_POLL_ATTEMPTS} attempts = 60 min)"
@@ -243,27 +309,30 @@ JOB_STATUS=""
243
309
 
244
310
  while [ ${POLL_COUNT} -lt ${MAX_POLL_ATTEMPTS} ]; do
245
311
  JOB_STATUS=$(aws sagemaker describe-ai-recommendation-job \
246
- --job-name "${OPTIMIZE_JOB_NAME}" \
312
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
247
313
  --region "${AWS_REGION}" \
248
- --query 'Status' \
314
+ --query 'AIRecommendationJobStatus' \
249
315
  --output text 2>/dev/null) || {
250
316
  echo "⚠️ Failed to describe recommendation job (credentials may have expired)"
251
317
  echo " Re-run to check status:"
252
- echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
318
+ echo " aws sagemaker describe-ai-recommendation-job --ai-recommendation-job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
253
319
  exit 1
254
320
  }
255
321
 
256
322
  case "${JOB_STATUS}" in
257
- COMPLETED)
323
+ COMPLETED|Completed)
258
324
  echo "✅ Recommendation job completed!"
325
+ JOB_STATUS="COMPLETED"
259
326
  break
260
327
  ;;
261
- FAILED)
328
+ FAILED|Failed)
262
329
  echo "❌ Recommendation job failed"
330
+ JOB_STATUS="FAILED"
263
331
  break
264
332
  ;;
265
- STOPPED)
333
+ STOPPED|Stopped)
266
334
  echo "⚠️ Recommendation job was stopped"
335
+ JOB_STATUS="STOPPED"
267
336
  break
268
337
  ;;
269
338
  *)
@@ -292,7 +361,7 @@ if [ "${JOB_STATUS}" = "COMPLETED" ]; then
292
361
 
293
362
  # Get the full job description with results
294
363
  JOB_DESCRIPTION=$(aws sagemaker describe-ai-recommendation-job \
295
- --job-name "${OPTIMIZE_JOB_NAME}" \
364
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
296
365
  --region "${AWS_REGION}" \
297
366
  --output json 2>/dev/null) || {
298
367
  echo "❌ Failed to fetch recommendation results"
@@ -503,7 +572,7 @@ except:
503
572
 
504
573
  if [ "${FAILURE_REASON}" = "unknown" ]; then
505
574
  FAILURE_REASON=$(aws sagemaker describe-ai-recommendation-job \
506
- --job-name "${OPTIMIZE_JOB_NAME}" \
575
+ --ai-recommendation-job-name "${OPTIMIZE_JOB_NAME}" \
507
576
  --region "${AWS_REGION}" \
508
577
  --query 'FailureReason' \
509
578
  --output text 2>/dev/null) || FAILURE_REASON="unknown"
@@ -513,7 +582,7 @@ except:
513
582
  echo " Reason: ${FAILURE_REASON}"
514
583
  echo ""
515
584
  echo " Debug:"
516
- echo " aws sagemaker describe-ai-recommendation-job --job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
585
+ echo " aws sagemaker describe-ai-recommendation-job --ai-recommendation-job-name ${OPTIMIZE_JOB_NAME} --region ${AWS_REGION}"
517
586
 
518
587
  elif [ "${JOB_STATUS}" = "STOPPED" ]; then
519
588
  echo "⚠️ Recommendation job was stopped before completion"
@@ -22,6 +22,8 @@ JSON_OUTPUT=false
22
22
  CI_MODE=false
23
23
  CI_TABLE_NAME="${CI_TABLE_NAME:-mlcc-ci-table}"
24
24
  CI_BUILD_STRATEGY="codebuild-submit"
25
+ BENCHMARK_STATUS=""
26
+ BENCHMARK_RUN_ID=""
25
27
 
26
28
  while [[ $# -gt 0 ]]; do
27
29
  case "$1" in
@@ -70,15 +72,102 @@ while [[ $# -gt 0 ]]; do
70
72
  CI_BUILD_STRATEGY="${1#*=}"
71
73
  shift
72
74
  ;;
75
+ --benchmark-status)
76
+ BENCHMARK_STATUS="$2"
77
+ shift 2
78
+ ;;
79
+ --benchmark-status=*)
80
+ BENCHMARK_STATUS="${1#*=}"
81
+ shift
82
+ ;;
83
+ --benchmark-run-id)
84
+ BENCHMARK_RUN_ID="$2"
85
+ shift 2
86
+ ;;
87
+ --benchmark-run-id=*)
88
+ BENCHMARK_RUN_ID="${1#*=}"
89
+ shift
90
+ ;;
73
91
  *)
74
92
  echo "⚠️ Unknown option: $1"
75
93
  echo ""
76
94
  echo "Usage: ./do/register [--notes \"text\"] [--status success|partial|failed] [--project] [--json] [--ci] [--ci-table <name>] [--build-strategy <strategy>]"
95
+ echo " ./do/register --benchmark-status <completed|failed|in-progress> --benchmark-run-id <run-id>"
77
96
  exit 1
78
97
  ;;
79
98
  esac
80
99
  done
81
100
 
101
+ # ============================================================
102
+ # Handle --benchmark-status mode (Stage 2 failure isolation)
103
+ # This mode ONLY writes benchmark fields to DynamoDB.
104
+ # It uses an UpdateExpression that sets ONLY:
105
+ # lastBenchmarkRunId, lastBenchmarkTimestamp, lastBenchmarkStatus
106
+ # It does NOT touch testStatus, configJson, or any other fields.
107
+ # This ensures Stage 2 failure does not change the CI gate result.
108
+ # Requirements: 1.4, 7.3
109
+ # ============================================================
110
+
111
+ if [ -n "${BENCHMARK_STATUS}" ]; then
112
+ # Validate benchmark status
113
+ case "${BENCHMARK_STATUS}" in
114
+ completed|failed|in-progress) ;;
115
+ *)
116
+ echo "❌ Invalid benchmark status: ${BENCHMARK_STATUS}"
117
+ echo " Valid values: completed, failed, in-progress"
118
+ exit 1
119
+ ;;
120
+ esac
121
+
122
+ if [ -z "${BENCHMARK_RUN_ID}" ]; then
123
+ echo "❌ --benchmark-run-id is required when using --benchmark-status"
124
+ exit 1
125
+ fi
126
+
127
+ # Compute configId (same hash logic used by write_ci_record below)
128
+ _benchmark_input="${DEPLOYMENT_CONFIG}:${MODEL_NAME:-none}:${INSTANCE_TYPE}:${AWS_REGION}:${DEPLOYMENT_TARGET}:ic${IC_COUNT:-1}:adapt${ADAPTER_COUNT:-0}"
129
+ if command -v sha256sum &> /dev/null; then
130
+ CONFIG_ID=$(echo -n "$_benchmark_input" | sha256sum | cut -c1-16)
131
+ else
132
+ CONFIG_ID=$(echo -n "$_benchmark_input" | shasum -a 256 | cut -c1-16)
133
+ fi
134
+ BENCHMARK_TIMESTAMP=$(date -u +%Y-%m-%dT%H:%M:%SZ)
135
+
136
+ echo "📊 Recording benchmark status for configId: ${CONFIG_ID}"
137
+ echo " Benchmark run ID: ${BENCHMARK_RUN_ID}"
138
+ echo " Benchmark status: ${BENCHMARK_STATUS}"
139
+ echo " Timestamp: ${BENCHMARK_TIMESTAMP}"
140
+ echo ""
141
+
142
+ # Check if CI_Table exists
143
+ if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" --region "${AWS_REGION}" &>/dev/null; then
144
+ echo "⚠️ CI infrastructure not provisioned. Run 'ml-container-creator bootstrap' with CI enabled."
145
+ echo " Skipping benchmark status write."
146
+ exit 0
147
+ fi
148
+
149
+ # UpdateExpression ONLY sets the 3 benchmark fields.
150
+ # This is a SET operation — it does NOT overwrite the entire record (PutItem).
151
+ # testStatus, configJson, and all other fields remain unchanged.
152
+ if aws dynamodb update-item \
153
+ --region "${AWS_REGION}" \
154
+ --table-name "${CI_TABLE_NAME}" \
155
+ --key "{\"configId\": {\"S\": \"${CONFIG_ID}\"}}" \
156
+ --update-expression "SET lastBenchmarkRunId = :rid, lastBenchmarkTimestamp = :ts, lastBenchmarkStatus = :bs" \
157
+ --expression-attribute-values "{
158
+ \":rid\": {\"S\": \"${BENCHMARK_RUN_ID}\"},
159
+ \":ts\": {\"S\": \"${BENCHMARK_TIMESTAMP}\"},
160
+ \":bs\": {\"S\": \"${BENCHMARK_STATUS}\"}
161
+ }" 2>/dev/null; then
162
+ echo "✅ Benchmark status recorded: ${BENCHMARK_STATUS}"
163
+ else
164
+ echo "❌ Failed to write benchmark status for configId: ${CONFIG_ID}"
165
+ exit 1
166
+ fi
167
+
168
+ exit 0
169
+ fi
170
+
82
171
  # Validate status
83
172
  case "${STATUS}" in
84
173
  success|partial|failed) ;;
package/templates/do/test CHANGED
@@ -214,6 +214,7 @@ else
214
214
  # Resolve inference component name
215
215
  # Precedence: do/adapters/ → do/ic/ → legacy config
216
216
  IC_NAME=""
217
+ ADAPTER_MODEL_NAME=""
217
218
  if [ -n "${IC_ARG}" ] && [ -f "${SCRIPT_DIR}/adapters/${IC_ARG}.conf" ]; then
218
219
  # Argument matches an adapter name — use adapter IC
219
220
  ADAPTER_IC_NAME=""
@@ -223,6 +224,7 @@ else
223
224
  exit 1
224
225
  fi
225
226
  IC_NAME="${ADAPTER_IC_NAME}"
227
+ ADAPTER_MODEL_NAME="${IC_ARG}"
226
228
  elif [ -n "${IC_ARG}" ]; then
227
229
  # Explicit IC name provided as argument
228
230
  IC_CONF="${SCRIPT_DIR}/ic/${IC_ARG}.conf"
@@ -258,6 +260,17 @@ else
258
260
  IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
259
261
  fi
260
262
 
263
+ # If testing an adapter, override the model name in the payload
264
+ if [ -n "${ADAPTER_MODEL_NAME}" ] && [ "${FRAMEWORK}" = "transformers" ]; then
265
+ case "${MODEL_SERVER}" in
266
+ vllm|sglang)
267
+ TEST_PAYLOAD='{"model": "'"${ADAPTER_MODEL_NAME}"'", "messages": [{"role": "user", "content": "What is machine learning?"}], "max_tokens": 50, "temperature": 0.7}'
268
+ echo "${TEST_PAYLOAD}" > "${TEMP_PAYLOAD}"
269
+ echo " (Using adapter model name: ${ADAPTER_MODEL_NAME})"
270
+ ;;
271
+ esac
272
+ fi
273
+
261
274
  INVOKE_ARGS=(
262
275
  --endpoint-name "${ENDPOINT_NAME}"
263
276
  --region "${AWS_REGION}"