@aws/ml-container-creator 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +86 -7
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/buildspec.yml +60 -0
  5. package/package.json +3 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +42 -2
  8. package/servers/instance-sizer/lib/instance-ranker.js +114 -10
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +15 -15
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +71 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +9 -1
  16. package/src/app.js +109 -3
  17. package/src/lib/bootstrap-command-handler.js +96 -3
  18. package/src/lib/cli-handler.js +2 -2
  19. package/src/lib/config-manager.js +117 -1
  20. package/src/lib/deployment-entry-schema.js +16 -0
  21. package/src/lib/prompt-runner.js +270 -12
  22. package/src/lib/prompts.js +288 -6
  23. package/src/lib/registry-command-handler.js +12 -0
  24. package/src/lib/schema-sync.js +31 -0
  25. package/src/lib/template-manager.js +49 -1
  26. package/src/lib/validate-runner.js +125 -2
  27. package/templates/Dockerfile +22 -2
  28. package/templates/code/cuda_compat.sh +22 -0
  29. package/templates/code/serve +3 -0
  30. package/templates/code/serving.properties +14 -0
  31. package/templates/code/start_server.sh +3 -0
  32. package/templates/diffusors/Dockerfile +2 -1
  33. package/templates/diffusors/serve +3 -0
  34. package/templates/do/README.md +33 -0
  35. package/templates/do/adapter +1214 -0
  36. package/templates/do/adapters/.gitkeep +2 -0
  37. package/templates/do/add-ic +130 -0
  38. package/templates/do/benchmark +718 -0
  39. package/templates/do/clean +593 -17
  40. package/templates/do/config +49 -4
  41. package/templates/do/deploy +513 -362
  42. package/templates/do/ic/default.conf +32 -0
  43. package/templates/do/lib/endpoint-config.sh +216 -0
  44. package/templates/do/lib/inference-component.sh +167 -0
  45. package/templates/do/lib/secrets.sh +44 -0
  46. package/templates/do/lib/wait.sh +131 -0
  47. package/templates/do/logs +107 -27
  48. package/templates/do/optimize +528 -0
  49. package/templates/do/register +119 -2
  50. package/templates/do/status +337 -0
  51. package/templates/do/test +80 -28
  52. package/templates/triton/Dockerfile +5 -0
@@ -89,6 +89,103 @@ case "${STATUS}" in
89
89
  ;;
90
90
  esac
91
91
 
92
+ # ============================================================
93
+ # Build IC list from do/ic/ directory (multi-IC support)
94
+ # ============================================================
95
+
96
+ <% if (deploymentTarget === 'realtime-inference') { %>
97
+ IC_LIST_JSON="[]"
98
+ if [ -d "${SCRIPT_DIR}/ic" ]; then
99
+ # Build IC list from all conf files (alphabetical order)
100
+ IC_ENTRIES=""
101
+ IC_COUNT=0
102
+ for conf in "${SCRIPT_DIR}"/ic/*.conf; do
103
+ [ -f "${conf}" ] || continue
104
+
105
+ # Source the IC config to get its variables
106
+ (
107
+ # Subshell to avoid polluting current environment
108
+ source "${conf}" 2>/dev/null
109
+ echo "${IC_DEPLOYED_NAME:-}|${IC_IMAGE_TAG:-}|${IC_GPU_COUNT:-1}|${IC_COPY_COUNT:-1}"
110
+ ) > /tmp/ic_entry_$$ 2>/dev/null
111
+
112
+ IC_ENTRY=$(cat /tmp/ic_entry_$$ 2>/dev/null || echo "|||")
113
+ rm -f /tmp/ic_entry_$$
114
+
115
+ IC_BASENAME=$(basename "${conf}" .conf)
116
+ IC_ENTRY_IMAGE=$(echo "${IC_ENTRY}" | cut -d'|' -f2)
117
+ IC_ENTRY_GPU=$(echo "${IC_ENTRY}" | cut -d'|' -f3)
118
+ IC_ENTRY_COPY=$(echo "${IC_ENTRY}" | cut -d'|' -f4)
119
+
120
+ if [ -n "${IC_ENTRIES}" ]; then
121
+ IC_ENTRIES="${IC_ENTRIES},"
122
+ fi
123
+ IC_ENTRIES="${IC_ENTRIES}{\"name\":\"${IC_BASENAME}\",\"image\":\"${IC_ENTRY_IMAGE}\",\"gpuCount\":${IC_ENTRY_GPU:-1},\"copyCount\":${IC_ENTRY_COPY:-1}}"
124
+ IC_COUNT=$((IC_COUNT + 1))
125
+ done
126
+
127
+ if [ "${CI_MODE}" = true ] && [ ${IC_COUNT} -gt 1 ]; then
128
+ # CI mode: only include the first IC (alphabetically) to keep CI costs down
129
+ FIRST_CONF=$(ls "${SCRIPT_DIR}"/ic/*.conf 2>/dev/null | head -1)
130
+ if [ -n "${FIRST_CONF}" ]; then
131
+ (
132
+ source "${FIRST_CONF}" 2>/dev/null
133
+ echo "${IC_DEPLOYED_NAME:-}|${IC_IMAGE_TAG:-}|${IC_GPU_COUNT:-1}|${IC_COPY_COUNT:-1}"
134
+ ) > /tmp/ic_first_$$ 2>/dev/null
135
+
136
+ FIRST_ENTRY=$(cat /tmp/ic_first_$$ 2>/dev/null || echo "|||")
137
+ rm -f /tmp/ic_first_$$
138
+
139
+ FIRST_BASENAME=$(basename "${FIRST_CONF}" .conf)
140
+ FIRST_IMAGE=$(echo "${FIRST_ENTRY}" | cut -d'|' -f2)
141
+ FIRST_GPU=$(echo "${FIRST_ENTRY}" | cut -d'|' -f3)
142
+ FIRST_COPY=$(echo "${FIRST_ENTRY}" | cut -d'|' -f4)
143
+
144
+ IC_LIST_JSON="[{\"name\":\"${FIRST_BASENAME}\",\"image\":\"${FIRST_IMAGE}\",\"gpuCount\":${FIRST_GPU:-1},\"copyCount\":${FIRST_COPY:-1}}]"
145
+ fi
146
+ else
147
+ IC_LIST_JSON="[${IC_ENTRIES}]"
148
+ fi
149
+ else
150
+ # Legacy: single IC from do/config
151
+ IC_LIST_JSON="[{\"name\":\"default\",\"image\":\"${PROJECT_NAME}-latest\",\"gpuCount\":${IC_GPU_COUNT:-1},\"copyCount\":${IC_COPY_COUNT:-1}}]"
152
+ fi
153
+
154
+ # Append adapter entries from do/adapters/*.conf
155
+ ADAPTER_COUNT=0
156
+ if [ -d "${SCRIPT_DIR}/adapters" ]; then
157
+ ADAPTER_ENTRIES=""
158
+ for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
159
+ [ -f "${conf}" ] || continue
160
+ [[ "$(basename "${conf}")" == ".gitkeep" ]] && continue
161
+
162
+ ADAPTER_NAME_VAL=""
163
+ ADAPTER_WEIGHTS_VAL=""
164
+ ADAPTER_IC_VAL=""
165
+ eval "$(grep '^export ADAPTER_NAME=' "${conf}" 2>/dev/null)"
166
+ eval "$(grep '^export ADAPTER_WEIGHTS_URI=' "${conf}" 2>/dev/null)"
167
+ eval "$(grep '^export ADAPTER_IC_NAME=' "${conf}" 2>/dev/null)"
168
+ ADAPTER_NAME_VAL="${ADAPTER_NAME:-$(basename "${conf}" .conf)}"
169
+ ADAPTER_WEIGHTS_VAL="${ADAPTER_WEIGHTS_URI:-}"
170
+ ADAPTER_IC_VAL="${ADAPTER_IC_NAME:-}"
171
+
172
+ if [ -n "${ADAPTER_ENTRIES}" ]; then
173
+ ADAPTER_ENTRIES="${ADAPTER_ENTRIES},"
174
+ fi
175
+ ADAPTER_ENTRIES="${ADAPTER_ENTRIES}{\"name\":\"${ADAPTER_NAME_VAL}\",\"isAdapter\":true,\"baseIcName\":\"${ADAPTER_IC_VAL}\",\"artifactUrl\":\"${ADAPTER_WEIGHTS_VAL}\",\"gpuCount\":0,\"copyCount\":1}"
176
+ ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
177
+ unset ADAPTER_NAME ADAPTER_WEIGHTS_URI ADAPTER_IC_NAME
178
+ done
179
+
180
+ if [ -n "${ADAPTER_ENTRIES}" ] && [ "${IC_LIST_JSON}" != "[]" ]; then
181
+ # Append adapters to existing IC list
182
+ IC_LIST_JSON="${IC_LIST_JSON%]},${ADAPTER_ENTRIES}]"
183
+ elif [ -n "${ADAPTER_ENTRIES}" ]; then
184
+ IC_LIST_JSON="[${ADAPTER_ENTRIES}]"
185
+ fi
186
+ fi
187
+ <% } %>
188
+
92
189
  # ============================================================
93
190
  # Derive architecture and backend from DEPLOYMENT_CONFIG
94
191
  # ============================================================
@@ -293,7 +390,7 @@ echo ""
293
390
  # ============================================================
294
391
 
295
392
  compute_config_id() {
296
- local input="${DEPLOYMENT_CONFIG}:${MODEL_NAME:-none}:${INSTANCE_TYPE}:${AWS_REGION}:${DEPLOYMENT_TARGET}"
393
+ local input="${DEPLOYMENT_CONFIG}:${MODEL_NAME:-none}:${INSTANCE_TYPE}:${AWS_REGION}:${DEPLOYMENT_TARGET}:ic${IC_COUNT:-1}:adapt${ADAPTER_COUNT:-0}"
297
394
  # Use sha256sum (Linux) with fallback to shasum (macOS)
298
395
  if command -v sha256sum &> /dev/null; then
299
396
  echo -n "$input" | sha256sum | cut -c1-16
@@ -373,6 +470,9 @@ write_ci_record() {
373
470
  "modelWeight": ${IC_MODEL_WEIGHT}
374
471
  <% } %>
375
472
  },
473
+ <% } %>
474
+ <% if (deploymentTarget === 'realtime-inference') { %>
475
+ "icList": ${IC_LIST_JSON},
376
476
  <% } %>
377
477
  "parameters": ${PARAMETERS}
378
478
  }
@@ -393,6 +493,7 @@ CJEOF
393
493
 
394
494
  # Try put-item with condition (new record)
395
495
  if aws dynamodb put-item \
496
+ --region "${AWS_REGION}" \
396
497
  --table-name "${CI_TABLE_NAME}" \
397
498
  --item "{
398
499
  \"configId\": {\"S\": \"${config_id}\"},
@@ -412,6 +513,7 @@ CJEOF
412
513
  else
413
514
  # Record already exists — update it (reset testStatus, update configJson, preserve createdAt)
414
515
  if aws dynamodb update-item \
516
+ --region "${AWS_REGION}" \
415
517
  --table-name "${CI_TABLE_NAME}" \
416
518
  --key "{\"configId\": {\"S\": \"${config_id}\"}}" \
417
519
  --update-expression "SET configJson = :cj, testStatus = :ts, deploymentConfig = :dc, baseImage = :bi, baseImageVersion = :bv, buildStrategy = :bs, projectName = :pn, schemaVersion = :sv" \
@@ -486,6 +588,9 @@ if [ "${JSON_OUTPUT}" = true ] || [ "${CI_MODE}" = true ]; then
486
588
  "modelWeight": ${IC_MODEL_WEIGHT}
487
589
  <% } %>
488
590
  },
591
+ <% } %>
592
+ <% if (deploymentTarget === 'realtime-inference') { %>
593
+ "icList": ${IC_LIST_JSON},
489
594
  <% } %>
490
595
  "parameters": ${PARAMETERS}
491
596
  }
@@ -496,6 +601,11 @@ DJEOF
496
601
  echo "${DEPLOYMENT_JSON}" | python3 -c "import sys,json; print(json.dumps(json.load(sys.stdin), indent=2))" 2>/dev/null || echo "${DEPLOYMENT_JSON}"
497
602
 
498
603
  if [ "${CI_MODE}" = true ]; then
604
+ # Strip capacity reservation ARN for CI — force on-demand deployment
605
+ # CI projects must never use reserved capacity (reservations are account-specific
606
+ # and time-bound; CI replay should always target on-demand instances)
607
+ unset CAPACITY_RESERVATION_ARN 2>/dev/null || true
608
+
499
609
  echo ""
500
610
  echo "⚠️ CI Integration is experimental and currently only tested for"
501
611
  echo " SageMaker Real-Time Inference endpoints."
@@ -507,7 +617,7 @@ DJEOF
507
617
  echo "🔑 configId: ${CONFIG_ID}"
508
618
 
509
619
  # Check if CI_Table exists before writing
510
- if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" &>/dev/null; then
620
+ if ! aws dynamodb describe-table --table-name "${CI_TABLE_NAME}" --region "${AWS_REGION}" &>/dev/null; then
511
621
  echo ""
512
622
  echo "⚠️ CI infrastructure not provisioned. Run 'ml-container-creator bootstrap' with CI enabled."
513
623
  echo " Skipping CI table write."
@@ -567,6 +677,13 @@ fi
567
677
  # Pass parameters as JSON string
568
678
  CMD_ARGS+=("--parameters" "${PARAMETERS}")
569
679
 
680
+ # Pass IC list as JSON string
681
+ <% if (deploymentTarget === 'realtime-inference') { %>
682
+ if [ "${IC_LIST_JSON}" != "[]" ]; then
683
+ CMD_ARGS+=("--ic-list" "${IC_LIST_JSON}")
684
+ fi
685
+ <% } %>
686
+
570
687
  # Pass generator version from package.json if available
571
688
  GENERATOR_VERSION=""
572
689
  if command -v node &> /dev/null; then
@@ -0,0 +1,337 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ set -e
6
+ set -u
7
+ set -o pipefail
8
+
9
+ # Source configuration
10
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
+ source "${SCRIPT_DIR}/config"
12
+
13
+ # ============================================================
14
+ # SageMaker Real-Time Inference Status
15
+ # ============================================================
16
+
17
+ # Validate AWS credentials
18
+ if ! aws sts get-caller-identity &> /dev/null; then
19
+ echo "❌ AWS credentials not configured"
20
+ echo " Run: aws configure"
21
+ exit 4
22
+ fi
23
+
24
+ # Check that we have an endpoint to query
25
+ if [ -z "${ENDPOINT_NAME:-}" ]; then
26
+ echo "❌ No endpoint configured"
27
+ echo " Run ./do/deploy first to create an endpoint."
28
+ exit 1
29
+ fi
30
+
31
+ # ============================================================
32
+ # Describe Endpoint
33
+ # ============================================================
34
+ ENDPOINT_JSON=$(aws sagemaker describe-endpoint \
35
+ --endpoint-name "${ENDPOINT_NAME}" \
36
+ --region "${AWS_REGION}" 2>/dev/null) || {
37
+ echo "❌ Endpoint not found: ${ENDPOINT_NAME}"
38
+ echo " The endpoint may have been deleted. Run ./do/deploy to create a new one."
39
+ exit 1
40
+ }
41
+
42
+ EP_STATUS=$(echo "${ENDPOINT_JSON}" | grep -o '"EndpointStatus":"[^"]*"' | head -1 | cut -d'"' -f4)
43
+ EP_INSTANCE_TYPE=$(echo "${ENDPOINT_JSON}" | grep -o '"InstanceType":"[^"]*"' | head -1 | cut -d'"' -f4)
44
+ EP_INSTANCE_COUNT=$(echo "${ENDPOINT_JSON}" | grep -o '"CurrentInstanceCount":[0-9]*' | head -1 | cut -d':' -f2)
45
+
46
+ # Fallback for instance count if not available
47
+ if [ -z "${EP_INSTANCE_COUNT}" ]; then
48
+ EP_INSTANCE_COUNT=$(echo "${ENDPOINT_JSON}" | grep -o '"InitialInstanceCount":[0-9]*' | head -1 | cut -d':' -f2)
49
+ fi
50
+ EP_INSTANCE_COUNT="${EP_INSTANCE_COUNT:-1}"
51
+
52
+ # Use INSTANCE_TYPE from config as fallback if not in describe response
53
+ EP_INSTANCE_TYPE="${EP_INSTANCE_TYPE:-${INSTANCE_TYPE:-unknown}}"
54
+
55
+ # GPU count lookup for the instance type
56
+ _get_instance_gpus() {
57
+ local itype="$1"
58
+ case "${itype}" in
59
+ ml.g4dn.xlarge) echo 1 ;;
60
+ ml.g4dn.12xlarge) echo 4 ;;
61
+ ml.g5.xlarge) echo 1 ;;
62
+ ml.g5.2xlarge) echo 1 ;;
63
+ ml.g5.4xlarge) echo 1 ;;
64
+ ml.g5.8xlarge) echo 1 ;;
65
+ ml.g5.12xlarge) echo 4 ;;
66
+ ml.g5.48xlarge) echo 8 ;;
67
+ ml.g6.xlarge) echo 1 ;;
68
+ ml.g6.12xlarge) echo 4 ;;
69
+ ml.g6.48xlarge) echo 8 ;;
70
+ ml.g6e.xlarge) echo 1 ;;
71
+ ml.g6e.2xlarge) echo 1 ;;
72
+ ml.g6e.4xlarge) echo 1 ;;
73
+ ml.g6e.8xlarge) echo 1 ;;
74
+ ml.g6e.12xlarge) echo 4 ;;
75
+ ml.g6e.48xlarge) echo 8 ;;
76
+ ml.g7e.xlarge) echo 1 ;;
77
+ ml.g7e.2xlarge) echo 1 ;;
78
+ ml.g7e.4xlarge) echo 1 ;;
79
+ ml.g7e.8xlarge) echo 1 ;;
80
+ ml.g7e.12xlarge) echo 4 ;;
81
+ ml.g7e.48xlarge) echo 8 ;;
82
+ ml.p3.2xlarge) echo 1 ;;
83
+ ml.p3.8xlarge) echo 4 ;;
84
+ ml.p3.16xlarge) echo 8 ;;
85
+ ml.p4d.24xlarge) echo 8 ;;
86
+ ml.p4de.24xlarge) echo 8 ;;
87
+ ml.p5.48xlarge) echo 8 ;;
88
+ *) echo "" ;;
89
+ esac
90
+ }
91
+
92
+ INSTANCE_GPUS=$(_get_instance_gpus "${EP_INSTANCE_TYPE}")
93
+ TOTAL_GPUS=""
94
+ if [ -n "${INSTANCE_GPUS}" ]; then
95
+ TOTAL_GPUS=$(( INSTANCE_GPUS * EP_INSTANCE_COUNT ))
96
+ fi
97
+
98
+ # ============================================================
99
+ # Detect Instance Pools
100
+ # ============================================================
101
+ HAS_INSTANCE_POOLS=false
102
+ if echo "${ENDPOINT_JSON}" | grep -q '"InstancePools"'; then
103
+ HAS_INSTANCE_POOLS=true
104
+ fi
105
+
106
+ # ============================================================
107
+ # Print Endpoint Status
108
+ # ============================================================
109
+ echo ""
110
+ if [ "${ENDPOINT_EXTERNAL:-false}" = "true" ]; then
111
+ echo "Endpoint: ${ENDPOINT_NAME} (external) [${EP_STATUS}]"
112
+ else
113
+ echo "Endpoint: ${ENDPOINT_NAME} [${EP_STATUS}]"
114
+ fi
115
+
116
+ if [ "${HAS_INSTANCE_POOLS}" = "true" ]; then
117
+ # Instance pools path: show per-pool information
118
+ echo "Instance Pools:"
119
+
120
+ # Extract pool entries from the DescribeEndpoint response
121
+ # Each pool has: InstanceType, Priority, and CurrentInstanceCount (from the running endpoint)
122
+ # Parse using grep/sed — pools are in ProductionVariants[0].InstancePools array
123
+ pool_entries=$(echo "${ENDPOINT_JSON}" | grep -oE '"InstancePools"\s*:\s*\[[^]]*\]' | head -1 | sed 's/"InstancePools"\s*:\s*//')
124
+
125
+ if [ -n "${pool_entries}" ]; then
126
+ # Extract individual pool objects
127
+ # Each pool: {"InstanceType":"ml.xxx","Priority":N} — may also have CurrentInstanceCount
128
+ pool_types=$(echo "${pool_entries}" | grep -oE '"InstanceType"\s*:\s*"[^"]+"' | sed 's/"InstanceType"\s*:\s*"//;s/"$//')
129
+ pool_priorities=$(echo "${pool_entries}" | grep -oE '"Priority"\s*:\s*[0-9]+' | sed 's/"Priority"\s*:\s*//')
130
+
131
+ # CurrentInstanceCount may appear per-pool in the response
132
+ # If not per-pool, fall back to the endpoint-level CurrentInstanceCount
133
+ pool_instance_counts=$(echo "${pool_entries}" | grep -oE '"CurrentInstanceCount"\s*:\s*[0-9]+' | sed 's/"CurrentInstanceCount"\s*:\s*//')
134
+
135
+ # Convert to arrays
136
+ IFS=$'\n' read -r -d '' -a types_arr <<< "${pool_types}" || true
137
+ IFS=$'\n' read -r -d '' -a priorities_arr <<< "${pool_priorities}" || true
138
+ IFS=$'\n' read -r -d '' -a counts_arr <<< "${pool_instance_counts}" || true
139
+
140
+ for i in "${!types_arr[@]}"; do
141
+ local_type="${types_arr[$i]}"
142
+ local_priority="${priorities_arr[$i]:-$((i+1))}"
143
+ local_count="${counts_arr[$i]:-0}"
144
+
145
+ # Mark pools with instances > 0 as active
146
+ if [ "${local_count}" -gt 0 ] 2>/dev/null; then
147
+ printf " Priority %s: %-20s (%s instances) ← active\n" "${local_priority}" "${local_type}" "${local_count}"
148
+ else
149
+ printf " Priority %s: %-20s (%s instances)\n" "${local_priority}" "${local_type}" "${local_count}"
150
+ fi
151
+ done
152
+ fi
153
+ else
154
+ # Standard single instance type path
155
+ if [ -n "${TOTAL_GPUS}" ]; then
156
+ echo "Instance: ${EP_INSTANCE_TYPE} (${EP_INSTANCE_COUNT} instance, ${TOTAL_GPUS} GPUs)"
157
+ else
158
+ echo "Instance: ${EP_INSTANCE_TYPE} (${EP_INSTANCE_COUNT} instance)"
159
+ fi
160
+ fi
161
+ echo ""
162
+
163
+ # ============================================================
164
+ # Describe Inference Components
165
+ # ============================================================
166
+ TOTAL_GPU_USED=0
167
+ IC_ROWS=""
168
+
169
+ if [ -d "${SCRIPT_DIR}/ic" ]; then
170
+ # Multi-IC path: iterate do/ic/*.conf files
171
+ # NOTE: Only base ICs in do/ic/ are counted toward GPU usage.
172
+ # Adapter ICs (do/adapters/*.conf) share the base IC's GPU resources
173
+ # and do not have their own ComputeResourceRequirements, so they are
174
+ # intentionally excluded from GPU capacity calculations.
175
+ HAS_ICS=false
176
+
177
+ for conf in "${SCRIPT_DIR}"/ic/*.conf; do
178
+ [ -f "${conf}" ] || continue
179
+ HAS_ICS=true
180
+
181
+ ic_basename=$(basename "${conf}" .conf)
182
+
183
+ # Read IC_DEPLOYED_NAME from the conf file
184
+ ic_deployed_name=""
185
+ if grep -q "^export IC_DEPLOYED_NAME=" "${conf}" 2>/dev/null; then
186
+ ic_deployed_name=$(grep "^export IC_DEPLOYED_NAME=" "${conf}" | sed 's/^export IC_DEPLOYED_NAME="//' | sed 's/"$//')
187
+ fi
188
+
189
+ if [ -z "${ic_deployed_name}" ]; then
190
+ IC_ROWS="${IC_ROWS}$(printf "%-18s %-12s %-6s %-6s" "${ic_basename}" "Not Deployed" "-" "-")\n"
191
+ continue
192
+ fi
193
+
194
+ # Call DescribeInferenceComponent
195
+ IC_JSON=$(aws sagemaker describe-inference-component \
196
+ --inference-component-name "${ic_deployed_name}" \
197
+ --region "${AWS_REGION}" 2>/dev/null) || {
198
+ IC_ROWS="${IC_ROWS}$(printf "%-18s %-12s %-6s %-6s" "${ic_basename}" "Not Found" "-" "-")\n"
199
+ continue
200
+ }
201
+
202
+ ic_status=$(echo "${IC_JSON}" | grep -o '"InferenceComponentStatus":"[^"]*"' | head -1 | cut -d'"' -f4)
203
+ ic_status="${ic_status:-Unknown}"
204
+
205
+ ic_gpu_count=$(echo "${IC_JSON}" | grep -o '"NumberOfAcceleratorDevicesRequired":[0-9]*' | head -1 | cut -d':' -f2)
206
+ ic_gpu_count="${ic_gpu_count:-0}"
207
+
208
+ ic_copy_count=$(echo "${IC_JSON}" | grep -o '"DesiredCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
209
+ if [ -z "${ic_copy_count}" ]; then
210
+ ic_copy_count=$(echo "${IC_JSON}" | grep -o '"CurrentCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
211
+ fi
212
+ ic_copy_count="${ic_copy_count:-1}"
213
+
214
+ TOTAL_GPU_USED=$(( TOTAL_GPU_USED + ic_gpu_count ))
215
+
216
+ IC_ROWS="${IC_ROWS}$(printf "%-18s %-12s %-6s %-6s" "${ic_basename}" "${ic_status}" "${ic_gpu_count}" "${ic_copy_count}")\n"
217
+ done
218
+
219
+ if [ "${HAS_ICS}" = true ]; then
220
+ echo "Inference Components:"
221
+ printf "%-18s %-12s %-6s %-6s\n" "NAME" "STATUS" "GPUs" "COPIES"
222
+ echo -e "${IC_ROWS}" | head -n -1
223
+ echo " ─────"
224
+ if [ -n "${TOTAL_GPUS}" ]; then
225
+ printf "Total GPU usage: %s/%s\n" "${TOTAL_GPU_USED}" "${TOTAL_GPUS}"
226
+ else
227
+ printf "Total GPU usage: %s\n" "${TOTAL_GPU_USED}"
228
+ fi
229
+ else
230
+ echo "No IC config files found in do/ic/"
231
+ fi
232
+ else
233
+ # Legacy single-IC path: use INFERENCE_COMPONENT_NAME from config
234
+ ic_name="${INFERENCE_COMPONENT_NAME:-}"
235
+
236
+ if [ -z "${ic_name}" ]; then
237
+ echo "No inference component deployed."
238
+ echo "Run ./do/deploy to create one."
239
+ else
240
+ IC_JSON=$(aws sagemaker describe-inference-component \
241
+ --inference-component-name "${ic_name}" \
242
+ --region "${AWS_REGION}" 2>/dev/null) || {
243
+ echo "Inference Components:"
244
+ printf "%-18s %-12s %-6s %-6s\n" "NAME" "STATUS" "GPUs" "COPIES"
245
+ printf "%-18s %-12s %-6s %-6s\n" "default" "Not Found" "-" "-"
246
+ echo ""
247
+ exit 0
248
+ }
249
+
250
+ ic_status=$(echo "${IC_JSON}" | grep -o '"InferenceComponentStatus":"[^"]*"' | head -1 | cut -d'"' -f4)
251
+ ic_status="${ic_status:-Unknown}"
252
+
253
+ ic_gpu_count=$(echo "${IC_JSON}" | grep -o '"NumberOfAcceleratorDevicesRequired":[0-9]*' | head -1 | cut -d':' -f2)
254
+ ic_gpu_count="${ic_gpu_count:-0}"
255
+
256
+ ic_copy_count=$(echo "${IC_JSON}" | grep -o '"DesiredCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
257
+ if [ -z "${ic_copy_count}" ]; then
258
+ ic_copy_count=$(echo "${IC_JSON}" | grep -o '"CurrentCopyCount":[0-9]*' | head -1 | cut -d':' -f2)
259
+ fi
260
+ ic_copy_count="${ic_copy_count:-1}"
261
+
262
+ TOTAL_GPU_USED=$(( TOTAL_GPU_USED + ic_gpu_count ))
263
+
264
+ echo "Inference Components:"
265
+ printf "%-18s %-12s %-6s %-6s\n" "NAME" "STATUS" "GPUs" "COPIES"
266
+ printf "%-18s %-12s %-6s %-6s\n" "default" "${ic_status}" "${ic_gpu_count}" "${ic_copy_count}"
267
+ echo " ─────"
268
+ if [ -n "${TOTAL_GPUS}" ]; then
269
+ printf "Total GPU usage: %s/%s\n" "${TOTAL_GPU_USED}" "${TOTAL_GPUS}"
270
+ else
271
+ printf "Total GPU usage: %s\n" "${TOTAL_GPU_USED}"
272
+ fi
273
+ fi
274
+ fi
275
+
276
+ echo ""
277
+
278
+ <% if (typeof enableLora !== 'undefined' && enableLora) { %>
279
+ # ============================================================
280
+ # Describe LoRA Adapters
281
+ # ============================================================
282
+ if [ "${ENABLE_LORA:-}" = "true" ]; then
283
+ # List all inference components on the endpoint
284
+ ADAPTER_IC_LIST=$(aws sagemaker list-inference-components \
285
+ --endpoint-name-equals "${ENDPOINT_NAME}" \
286
+ --region "${AWS_REGION}" 2>/dev/null) || ADAPTER_IC_LIST=""
287
+
288
+ if [ -n "${ADAPTER_IC_LIST}" ]; then
289
+ # Extract IC names
290
+ ADAPTER_IC_NAMES=$(echo "${ADAPTER_IC_LIST}" | jq -r '.InferenceComponents[].InferenceComponentName' 2>/dev/null)
291
+
292
+ # Filter to adapter ICs (those with BaseInferenceComponentName) and collect details
293
+ ADAPTER_ROWS=""
294
+ ADAPTER_COUNT=0
295
+
296
+ for ic_name in ${ADAPTER_IC_NAMES}; do
297
+ # Describe each IC to check if it's an adapter
298
+ ic_detail=$(aws sagemaker describe-inference-component \
299
+ --inference-component-name "${ic_name}" \
300
+ --region "${AWS_REGION}" 2>/dev/null) || continue
301
+
302
+ # Check if this IC has a BaseInferenceComponentName (adapter IC)
303
+ base_ic=$(echo "${ic_detail}" | jq -r '.Specification.BaseInferenceComponentName // empty' 2>/dev/null)
304
+
305
+ if [ -z "${base_ic}" ]; then
306
+ # Not an adapter IC — skip
307
+ continue
308
+ fi
309
+
310
+ # Extract status and artifact URL
311
+ adapter_status=$(echo "${ic_detail}" | jq -r '.InferenceComponentStatus // "Unknown"' 2>/dev/null)
312
+ adapter_weights=$(echo "${ic_detail}" | jq -r '.Specification.Container.ArtifactUrl // "N/A"' 2>/dev/null)
313
+
314
+ # Derive display name (strip project prefix if present)
315
+ display_name="${ic_name}"
316
+ if [[ "${ic_name}" == "${PROJECT_NAME}-adapter-"* ]]; then
317
+ display_name="${ic_name#${PROJECT_NAME}-adapter-}"
318
+ fi
319
+
320
+ ADAPTER_ROWS="${ADAPTER_ROWS}$(printf '%-14s%-12s%s' "${display_name}" "${adapter_status}" "${adapter_weights}")\n"
321
+ ADAPTER_COUNT=$((ADAPTER_COUNT + 1))
322
+ done
323
+
324
+ echo "Adapters (LoRA): [max: <%= maxLoras %> GPU / 70 CPU]"
325
+ if [ "${ADAPTER_COUNT}" -eq 0 ]; then
326
+ echo " No adapters deployed"
327
+ else
328
+ printf '%-14s%-12s%s\n' "NAME" "STATUS" "WEIGHTS"
329
+ echo -e "${ADAPTER_ROWS}" | head -n -1
330
+ fi
331
+ else
332
+ echo "Adapters (LoRA): [max: <%= maxLoras %> GPU / 70 CPU]"
333
+ echo " No adapters deployed"
334
+ fi
335
+ echo ""
336
+ fi
337
+ <% } %>
package/templates/do/test CHANGED
@@ -15,10 +15,11 @@ source "${SCRIPT_DIR}/config"
15
15
  # SageMaker Real-Time Inference Testing
16
16
  # ============================================================
17
17
 
18
- # Parse arguments
19
- ENDPOINT_NAME="${1:-${ENDPOINT_NAME:-}}"
18
+ # Parse arguments: ./do/test [<ic-name>]
19
+ IC_ARG="${1:-}"
20
20
 
21
- if [ -z "${ENDPOINT_NAME}" ]; then
21
+ # Determine test mode based on ENDPOINT_NAME in config
22
+ if [ -z "${ENDPOINT_NAME:-}" ]; then
22
23
  echo "🧪 Testing local container at localhost:8080"
23
24
  echo " Project: ${PROJECT_NAME}"
24
25
  echo " Framework: ${FRAMEWORK}"
@@ -210,8 +211,53 @@ else
210
211
  # Create temporary file for response
211
212
  TEMP_RESPONSE=$(mktemp)
212
213
 
213
- # Invoke endpoint via inference component
214
- IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
214
+ # Resolve inference component name
215
+ # Precedence: do/adapters/ → do/ic/ → legacy config
216
+ IC_NAME=""
217
+ if [ -n "${IC_ARG}" ] && [ -f "${SCRIPT_DIR}/adapters/${IC_ARG}.conf" ]; then
218
+ # Argument matches an adapter name — use adapter IC
219
+ ADAPTER_IC_NAME=""
220
+ source "${SCRIPT_DIR}/adapters/${IC_ARG}.conf"
221
+ if [ -z "${ADAPTER_IC_NAME}" ]; then
222
+ echo "❌ Adapter '${IC_ARG}' conf is missing ADAPTER_IC_NAME."
223
+ exit 1
224
+ fi
225
+ IC_NAME="${ADAPTER_IC_NAME}"
226
+ elif [ -n "${IC_ARG}" ]; then
227
+ # Explicit IC name provided as argument
228
+ IC_CONF="${SCRIPT_DIR}/ic/${IC_ARG}.conf"
229
+ if [ ! -f "${IC_CONF}" ]; then
230
+ echo "❌ IC config not found: do/ic/${IC_ARG}.conf"
231
+ exit 1
232
+ fi
233
+ IC_DEPLOYED_NAME=""
234
+ source "${IC_CONF}"
235
+ if [ -z "${IC_DEPLOYED_NAME}" ]; then
236
+ echo "❌ IC '${IC_ARG}' has not been deployed yet. Run ./do/deploy --ic ${IC_ARG} first."
237
+ exit 1
238
+ fi
239
+ IC_NAME="${IC_DEPLOYED_NAME}"
240
+ elif [ -d "${SCRIPT_DIR}/ic" ]; then
241
+ # No argument, but do/ic/ exists — use first IC alphabetically
242
+ IC_NAME=""
243
+ for conf in "${SCRIPT_DIR}"/ic/*.conf; do
244
+ [ -f "${conf}" ] || continue
245
+ IC_DEPLOYED_NAME=""
246
+ source "${conf}"
247
+ if [ -n "${IC_DEPLOYED_NAME}" ]; then
248
+ IC_NAME="${IC_DEPLOYED_NAME}"
249
+ break
250
+ fi
251
+ done
252
+ if [ -z "${IC_NAME}" ]; then
253
+ echo "❌ No ICs deployed. Run ./do/deploy first."
254
+ exit 1
255
+ fi
256
+ else
257
+ # Legacy: no do/ic/ directory, use INFERENCE_COMPONENT_NAME from do/config
258
+ IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
259
+ fi
260
+
215
261
  INVOKE_ARGS=(
216
262
  --endpoint-name "${ENDPOINT_NAME}"
217
263
  --region "${AWS_REGION}"
@@ -285,12 +331,15 @@ if [ "${TEST_MODE}" = "local" ]; then
285
331
  echo " • Push to ECR: ./do/push"
286
332
  echo " • Deploy to SageMaker: ./do/deploy"
287
333
  else
288
- echo "Endpoint is ready for production use!"
289
- echo " • Endpoint name: ${ENDPOINT_NAME}"
290
- echo " Region: ${AWS_REGION}"
291
- echo ""
292
- echo "📝 Register this deployment:"
293
- echo " ./do/register"
334
+ echo "📋 What's next?"
335
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
336
+ echo " Benchmark performance: ./do/benchmark"
337
+ <% } %>
338
+ <% if (typeof enableLora !== 'undefined' && enableLora) { %>
339
+ echo " • Add a LoRA adapter: ./do/adapter add <name> --weights s3://..."
340
+ <% } %>
341
+ echo " • Register this deployment: ./do/register"
342
+ echo " • View logs: ./do/logs"
294
343
  fi
295
344
 
296
345
  <% } else if (deploymentTarget === 'async-inference') { %>
@@ -599,13 +648,13 @@ if [ "${TEST_MODE}" = "local" ]; then
599
648
  echo " • Push to ECR: ./do/push"
600
649
  echo " • Deploy to SageMaker: ./do/deploy"
601
650
  else
602
- echo "Async endpoint is ready for production use!"
603
- echo " • Endpoint name: ${ENDPOINT_NAME}"
604
- echo " Region: ${AWS_REGION}"
605
- echo " • S3 output: ${ASYNC_S3_OUTPUT_PATH}"
606
- echo ""
607
- echo "📝 Register this deployment:"
608
- echo " ./do/register"
651
+ echo "📋 What's next?"
652
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
653
+ echo " Benchmark performance: ./do/benchmark"
654
+ <% } %>
655
+ echo " • Check async output: aws s3 ls ${ASYNC_S3_OUTPUT_PATH}"
656
+ echo " Register this deployment: ./do/register"
657
+ echo " • View logs: ./do/logs"
609
658
  fi
610
659
 
611
660
  <% } else if (deploymentTarget === 'hyperpod-eks') { %>
@@ -864,13 +913,14 @@ if [ "${TEST_TARGET}" = "local" ]; then
864
913
  echo " • Push to ECR: ./do/push"
865
914
  echo " • Deploy to HyperPod: ./do/deploy"
866
915
  else
867
- echo "HyperPod deployment is ready for production use!"
868
- echo " • Cluster: ${HYPERPOD_CLUSTER_NAME}"
869
- echo " Namespace: ${HYPERPOD_NAMESPACE}"
870
- echo " • Service: ${PROJECT_NAME}"
871
- echo ""
872
- echo "📝 Register this deployment:"
873
- echo " ./do/register"
916
+ echo "📋 What's next?"
917
+ <% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
918
+ echo " Benchmark performance: ./do/benchmark"
919
+ <% } %>
920
+ echo " • Check pod status: kubectl get pods -n ${HYPERPOD_NAMESPACE}"
921
+ echo " View pod logs: kubectl logs -n ${HYPERPOD_NAMESPACE} -l app=${PROJECT_NAME}"
922
+ echo " • Register this deployment: ./do/register"
923
+ echo " • View logs: ./do/logs"
874
924
  fi
875
925
 
876
926
  <% } else if (deploymentTarget === 'batch-transform') { %>
@@ -950,8 +1000,10 @@ case "${TEST_TARGET}" in
950
1000
  echo ""
951
1001
  echo "✅ All tests passed!"
952
1002
  echo ""
953
- echo "📝 Register this deployment:"
954
- echo " ./do/register"
1003
+ echo "📋 What's next?"
1004
+ echo " • View results: cat batch-output/"
1005
+ echo " • Register this deployment: ./do/register"
1006
+ echo " • View logs: ./do/logs"
955
1007
  ;;
956
1008
  InProgress)
957
1009
  echo "⏳ Transform job is still in progress"
@@ -1144,4 +1196,4 @@ echo "Next steps:"
1144
1196
  echo " • Push to ECR: ./do/push"
1145
1197
  echo " • Deploy batch transform: ./do/deploy"
1146
1198
 
1147
- <% } %>
1199
+ <% } %>