@aws/ml-container-creator 0.13.4 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +23 -5
  2. package/config/parameter-schema-v2.json +32 -4
  3. package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
  4. package/infra/ci-harness/package-lock.json +122 -116
  5. package/infra/ci-harness/package.json +1 -1
  6. package/package.json +5 -3
  7. package/pyproject.toml +21 -0
  8. package/requirements.txt +19 -0
  9. package/servers/instance-sizer/index.js +72 -4
  10. package/servers/instance-sizer/lib/model-resolver.js +28 -2
  11. package/src/app.js +17 -0
  12. package/src/lib/bootstrap-command-handler.js +33 -23
  13. package/src/lib/config-loader.js +18 -0
  14. package/src/lib/config-manager.js +6 -1
  15. package/src/lib/dataset-slug.js +152 -0
  16. package/src/lib/generated/cli-options.js +9 -3
  17. package/src/lib/generated/parameter-matrix.js +14 -3
  18. package/src/lib/generated/validation-rules.js +1 -1
  19. package/src/lib/mcp-query-runner.js +6 -0
  20. package/src/lib/prompt-runner.js +5 -0
  21. package/src/lib/prompts/feature-prompts.js +1 -1
  22. package/src/lib/template-manager.js +0 -7
  23. package/src/lib/template-variable-resolver.js +51 -1
  24. package/src/lib/tune-config-state.js +14 -1
  25. package/templates/do/.adapter_helper.py +451 -0
  26. package/templates/do/.benchmark_writer.py +22 -0
  27. package/templates/do/.register_helper.py +1163 -0
  28. package/templates/do/.stage_helper.py +419 -0
  29. package/templates/do/.tune_helper.py +379 -65
  30. package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
  31. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  32. package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
  33. package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
  34. package/templates/do/adapter +427 -27
  35. package/templates/do/add-ic +85 -3
  36. package/templates/do/benchmark +173 -15
  37. package/templates/do/config +24 -0
  38. package/templates/do/lib/inference-component.sh +56 -3
  39. package/templates/do/lib/profile.sh +5 -0
  40. package/templates/do/register +552 -6
  41. package/templates/do/stage +91 -272
  42. package/templates/do/test +12 -2
  43. package/templates/do/tune +264 -12
@@ -3,18 +3,17 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  # do/stage — Pre-stage model weights from HuggingFace to S3
6
- # Downloads the model using huggingface-cli and syncs to S3 so that
7
- # vLLM can load directly from S3 at deploy time (fast cold-start).
6
+ # Submits a SageMaker Processing Job that downloads from HuggingFace
7
+ # and writes directly to S3 no local disk usage.
8
8
  #
9
9
  # Idempotent: if the model is already staged (config.json exists at
10
10
  # the target S3 path), the script exits early.
11
11
  #
12
12
  # Usage:
13
- # ./do/stage Stage model to S3
13
+ # ./do/stage Submit Processing Job to stage model (default)
14
+ # ./do/stage --local Download locally then sync to S3
15
+ # ./do/stage --no-wait Submit and exit without polling
14
16
  # ./do/stage --force Re-stage even if already present in S3
15
- # ./do/stage --update-config Stage and update MODEL_NAME in do/config
16
- # ./do/stage --submit Submit as SageMaker Processing Job (for models >500GB)
17
- # ./do/stage --submit --no-wait Submit and exit without polling
18
17
 
19
18
  set -e
20
19
  set -u
@@ -29,30 +28,32 @@ source "${SCRIPT_DIR}/lib/staged-assets.sh"
29
28
  # ── Parse flags ───────────────────────────────────────────────────────────────
30
29
  FORCE=false
31
30
  UPDATE_CONFIG=true
32
- SUBMIT_MODE=false
31
+ LOCAL_MODE=false
33
32
  NO_WAIT=false
34
33
  while [ $# -gt 0 ]; do
35
34
  case "$1" in
36
35
  --force) FORCE=true; shift ;;
37
36
  --update-config) UPDATE_CONFIG=true; shift ;; # default, kept for backward compat
38
37
  --no-update-config) UPDATE_CONFIG=false; shift ;;
39
- --submit) SUBMIT_MODE=true; shift ;;
38
+ --local) LOCAL_MODE=true; shift ;;
39
+ --submit) shift ;; # Deprecated — now the default; kept for backward compat
40
40
  --no-wait) NO_WAIT=true; shift ;;
41
41
  --help|-h)
42
- echo "Usage: ./do/stage [--force] [--no-update-config] [--submit] [--no-wait]"
42
+ echo "Usage: ./do/stage [--force] [--local] [--no-wait] [--no-update-config]"
43
43
  echo ""
44
44
  echo "Pre-stage model weights from HuggingFace to S3."
45
45
  echo "On success, updates MODEL_NAME in do/config so subsequent tasks"
46
46
  echo "(submit, deploy) pull from S3 with HuggingFace as fallback."
47
47
  echo ""
48
48
  echo "Modes:"
49
- echo " (default) Download locally then sync to S3"
50
- echo " --submit Submit as SageMaker Processing Job (for models >500GB)"
49
+ echo " (default) Submit SageMaker Processing Job (no local disk usage)"
50
+ echo " --local Download locally then sync to S3 (legacy behavior)"
51
+ echo " --submit Deprecated — Processing Job is now the default"
51
52
  echo ""
52
53
  echo "Options:"
53
54
  echo " --force Re-stage even if model already exists in S3"
54
55
  echo " --no-update-config Do NOT update MODEL_NAME in do/config after staging"
55
- echo " --no-wait (with --submit) Exit without polling for completion"
56
+ echo " --no-wait Return immediately with job name (Processing Job mode)"
56
57
  echo ""
57
58
  echo "Environment:"
58
59
  echo " HF_TOKEN HuggingFace token (for gated models)"
@@ -65,10 +66,7 @@ while [ $# -gt 0 ]; do
65
66
  esac
66
67
  done
67
68
 
68
- # ── Processing Job submission function ────────────────────────────────────────
69
- # Submits a SageMaker Processing Job that downloads model weights from HuggingFace
70
- # and syncs them to S3. Uses 2TB attached storage to handle any model size.
71
- POLL_INTERVAL=30
69
+ # ── Processing Job submission via .stage_helper.py ────────────────────────────
72
70
  PROCESSING_JOB_INSTANCE_TYPE="ml.m5.xlarge"
73
71
  PROCESSING_JOB_VOLUME_GB=2048
74
72
 
@@ -80,19 +78,12 @@ _submit_processing_job() {
80
78
  echo " Storage: ${PROCESSING_JOB_VOLUME_GB} GB"
81
79
  echo ""
82
80
 
83
- # Validate AWS credentials
84
- if ! aws sts get-caller-identity &>/dev/null; then
85
- echo "❌ AWS credentials not configured or expired."
86
- echo " Run: aws configure"
87
- exit 4
88
- fi
89
-
90
81
  # Resolve execution role from profile
91
82
  local execution_role
92
83
  execution_role=$(echo "${_PROFILE_JSON}" | python3 -c "
93
84
  import sys, json
94
85
  p = json.load(sys.stdin)
95
- print(p.get('executionRoleArn', ''))
86
+ print(p.get('roleArn', ''))
96
87
  " 2>/dev/null) || execution_role=""
97
88
 
98
89
  if [ -z "${execution_role}" ]; then
@@ -102,267 +93,88 @@ print(p.get('executionRoleArn', ''))
102
93
  exit 1
103
94
  fi
104
95
 
105
- # Resolve HF token ARN for the processing job (optional — for gated models)
96
+ # Resolve HF token (optional — for gated models)
97
+ local hf_token_value=""
106
98
  local hf_token_secret_arn="${HF_TOKEN_ARN:-}"
107
-
108
- # Generate job name with timestamp
109
- local timestamp
110
- timestamp=$(date +%Y%m%d-%H%M%S)
111
- local job_name="mlcc-stage-${PROJECT_NAME}-${timestamp}"
112
- # SageMaker job names max 63 chars, must match [a-zA-Z0-9](-*[a-zA-Z0-9])*
113
- job_name=$(echo "${job_name}" | cut -c1-63 | sed 's/[^a-zA-Z0-9-]/-/g' | sed 's/-*$//')
114
-
115
- echo " Job name: ${job_name}"
116
- echo ""
117
-
118
- # Build the entrypoint script that runs inside the processing container
119
- local entrypoint_script
120
- entrypoint_script=$(cat <<'ENTRYPOINT_EOF'
121
- #!/bin/bash
122
- set -e
123
- set -o pipefail
124
-
125
- echo "=== MCC Model Staging Processing Job ==="
126
- echo "Model: ${MODEL_ID}"
127
- echo "Target: ${S3_OUTPUT_URI}"
128
- echo ""
129
-
130
- # Install dependencies
131
- echo "📦 Checking huggingface-cli and hf_transfer..."
132
- pip install -q huggingface_hub[cli] hf_transfer 2>/dev/null || true
133
-
134
- # Enable fast parallel downloads only if hf_transfer is available
135
- if python3 -c "import hf_transfer" 2>/dev/null; then
136
- export HF_HUB_ENABLE_HF_TRANSFER=1
137
- else
138
- echo " ℹ️ hf_transfer not available — using standard download (install with: pip install hf_transfer)"
139
- unset HF_HUB_ENABLE_HF_TRANSFER 2>/dev/null || true
140
- fi
141
-
142
- # Set HF token if provided
143
- if [ -n "${HF_TOKEN:-}" ]; then
144
- echo "🔐 Using provided HuggingFace token"
145
- fi
146
-
147
- # Download model from HuggingFace
148
- echo ""
149
- echo "⬇️ Downloading model: ${MODEL_ID}"
150
- DOWNLOAD_ARGS="${MODEL_ID}"
151
- if [ -n "${HF_TOKEN:-}" ]; then
152
- DOWNLOAD_ARGS="${DOWNLOAD_ARGS} --token ${HF_TOKEN}"
153
- fi
154
- huggingface-cli download ${DOWNLOAD_ARGS}
155
-
156
- echo ""
157
- echo "✅ Download complete"
158
-
159
- # Locate downloaded files
160
- CACHE_PATH=$(python3 -c "
161
- from huggingface_hub import snapshot_download
162
- path = snapshot_download('${MODEL_ID}', local_files_only=True)
163
- print(path)
164
- ")
165
-
166
- echo "📁 Cache path: ${CACHE_PATH}"
167
-
168
- # Sync to S3
169
- echo ""
170
- echo "☁️ Syncing to S3: ${S3_OUTPUT_URI}"
171
- aws s3 sync "${CACHE_PATH}" "${S3_OUTPUT_URI}" \
172
- --no-progress \
173
- --exclude "*.lock" \
174
- --exclude ".gitattributes"
175
-
176
- echo ""
177
- echo "✅ Model staged successfully to: ${S3_OUTPUT_URI}"
178
- ENTRYPOINT_EOF
179
- )
180
-
181
- # Build environment variables for the container
182
- local env_vars="MODEL_ID=${MODEL_NAME},S3_OUTPUT_URI=${MODEL_S3_URI}"
183
99
  if [ -n "${hf_token_secret_arn}" ]; then
184
- # Resolve token and pass as env var to the job
185
- local hf_token_value=""
186
100
  hf_token_value=$(aws secretsmanager get-secret-value \
187
101
  --secret-id "${hf_token_secret_arn}" \
188
102
  --query SecretString --output text 2>/dev/null) || hf_token_value=""
189
- if [ -n "${hf_token_value}" ]; then
190
- env_vars="${env_vars},HF_TOKEN=${hf_token_value}"
191
- fi
192
103
  elif [ -n "${HF_TOKEN:-}" ]; then
193
- env_vars="${env_vars},HF_TOKEN=${HF_TOKEN}"
104
+ hf_token_value="${HF_TOKEN}"
194
105
  fi
195
106
 
196
- # Write entrypoint to a temp file for the processing job input
197
- local entrypoint_s3_key="staging-jobs/${job_name}/entrypoint.sh"
198
- local entrypoint_s3_uri="s3://${STAGE_S3_BUCKET}/${entrypoint_s3_key}"
199
-
200
- echo "📤 Uploading entrypoint script..."
201
- echo "${entrypoint_script}" | aws s3 cp - "${entrypoint_s3_uri}" --region "${AWS_REGION}"
202
-
203
- # Create the processing job
204
- # Uses a lightweight Python image with AWS CLI pre-installed
205
- local container_image="763104351884.dkr.ecr.${AWS_REGION}.amazonaws.com/pytorch-training:2.1.0-cpu-py310-ubuntu20.04-sagemaker"
206
-
207
- local processing_request
208
- processing_request=$(python3 -c "
209
- import json, sys
210
-
211
- job = {
212
- 'ProcessingJobName': '${job_name}',
213
- 'ProcessingResources': {
214
- 'ClusterConfig': {
215
- 'InstanceCount': 1,
216
- 'InstanceType': '${PROCESSING_JOB_INSTANCE_TYPE}',
217
- 'VolumeSizeInGB': ${PROCESSING_JOB_VOLUME_GB}
218
- }
219
- },
220
- 'AppSpecification': {
221
- 'ImageUri': '${container_image}',
222
- 'ContainerEntrypoint': ['bash', '-c'],
223
- 'ContainerArguments': ['aws s3 cp ${entrypoint_s3_uri} /tmp/entrypoint.sh && chmod +x /tmp/entrypoint.sh && /tmp/entrypoint.sh']
224
- },
225
- 'Environment': dict(item.split('=', 1) for item in '${env_vars}'.split(',')),
226
- 'RoleArn': '${execution_role}',
227
- 'StoppingCondition': {
228
- 'MaxRuntimeInSeconds': 86400
229
- }
230
- }
231
-
232
- print(json.dumps(job, indent=2))
233
- ")
234
-
235
- # Write request JSON to temp file
236
- local request_file="/tmp/mlcc-stage-request-${timestamp}.json"
237
- echo "${processing_request}" > "${request_file}"
238
-
239
- echo "🚀 Creating Processing Job: ${job_name}"
240
- echo ""
241
-
242
- local create_output
243
- local create_exit_code
244
- create_output=$(aws sagemaker create-processing-job \
245
- --cli-input-json "file://${request_file}" \
246
- --region "${AWS_REGION}" 2>&1) || create_exit_code=$?
247
- create_exit_code=${create_exit_code:-0}
107
+ # Build helper arguments
108
+ local helper_args=(
109
+ submit
110
+ --model-name "${MODEL_NAME}"
111
+ --bucket "${STAGE_S3_BUCKET}"
112
+ --project "${PROJECT_NAME}"
113
+ --role-arn "${execution_role}"
114
+ --region "${AWS_REGION}"
115
+ --instance-type "${PROCESSING_JOB_INSTANCE_TYPE}"
116
+ --volume-size-gb "${PROCESSING_JOB_VOLUME_GB}"
117
+ )
118
+ if [ -n "${hf_token_value}" ]; then
119
+ helper_args+=(--hf-token "${hf_token_value}")
120
+ fi
121
+ if [ "${FORCE}" = true ]; then
122
+ helper_args+=(--force)
123
+ fi
124
+ if [ "${NO_WAIT}" = true ]; then
125
+ helper_args+=(--no-wait)
126
+ fi
248
127
 
249
- rm -f "${request_file}"
128
+ # Call .stage_helper.py (sagemaker-core ProcessingJob.create())
129
+ # stdout = JSON result, stderr = progress messages (piped to user)
130
+ local json_output
131
+ local helper_exit_code=0
132
+ json_output=$(python3 "${SCRIPT_DIR}/.stage_helper.py" "${helper_args[@]}") || helper_exit_code=$?
250
133
 
251
- if [ ${create_exit_code} -ne 0 ]; then
252
- echo "❌ Failed to create Processing Job"
253
- echo " ${create_output}"
134
+ if [ ${helper_exit_code} -ne 0 ]; then
254
135
  echo ""
255
- if echo "${create_output}" | grep -q "AccessDeniedException"; then
256
- echo " Remediation: ensure the execution role has sagemaker:CreateProcessingJob permission"
257
- fi
258
- exit 1
136
+ echo " Processing Job failed"
137
+ echo " To retry: ./do/stage --force"
138
+ exit ${helper_exit_code}
259
139
  fi
260
140
 
261
- echo " ✅ Processing Job submitted: ${job_name}"
262
- echo ""
141
+ # Parse JSON output
142
+ local job_status
143
+ local job_name
144
+ local s3_uri
145
+ job_status=$(echo "${json_output}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))" 2>/dev/null) || job_status=""
146
+ job_name=$(echo "${json_output}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('job_name',''))" 2>/dev/null) || job_name=""
147
+ s3_uri=$(echo "${json_output}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('s3_uri',''))" 2>/dev/null) || s3_uri="${MODEL_S3_URI}"
263
148
 
264
- # Handle --no-wait
265
- if [ "${NO_WAIT}" = true ]; then
149
+ if [ "${job_status}" = "AlreadyStaged" ]; then
150
+ echo "✅ Model already staged at: ${s3_uri}"
151
+ echo " Use --force to re-stage."
152
+ elif [ "${job_status}" = "Submitted" ]; then
153
+ echo " ✅ Processing Job submitted: ${job_name}"
154
+ echo ""
266
155
  echo " --no-wait specified. Job submitted, exiting without polling."
267
156
  echo ""
268
157
  echo " Check status:"
269
- echo " aws sagemaker describe-processing-job --processing-job-name ${job_name} --region ${AWS_REGION}"
158
+ echo " python3 ${SCRIPT_DIR}/.stage_helper.py status --job-name ${job_name}"
270
159
  echo ""
271
160
  echo " On completion, the staged model will be at:"
272
- echo " ${MODEL_S3_URI}"
273
- return 0
161
+ echo " ${s3_uri}"
162
+ elif [ "${job_status}" = "Completed" ]; then
163
+ echo ""
164
+ echo "✅ Processing Job completed: ${job_name}"
165
+ echo ""
166
+ echo " S3 URI: ${s3_uri}"
274
167
  fi
275
168
 
276
- # Poll for completion
277
- _poll_processing_job "${job_name}"
278
- }
279
-
280
- # ── Poll Processing Job status ────────────────────────────────────────────────
281
- _poll_processing_job() {
282
- local job_name="$1"
283
-
284
- echo "⏳ Polling Processing Job status (every ${POLL_INTERVAL}s)..."
285
- echo " (Ctrl+C to stop polling — job continues in background)"
286
- echo ""
287
-
288
- while true; do
289
- local describe_output
290
- local describe_exit_code
291
- describe_output=$(aws sagemaker describe-processing-job \
292
- --processing-job-name "${job_name}" \
293
- --region "${AWS_REGION}" 2>&1) || describe_exit_code=$?
294
- describe_exit_code=${describe_exit_code:-0}
295
-
296
- if [ ${describe_exit_code} -ne 0 ]; then
297
- echo " ⚠️ Failed to describe job (will retry): ${describe_output}"
298
- sleep "${POLL_INTERVAL}"
299
- continue
300
- fi
301
-
302
- # Parse status from response
303
- local job_status
304
- local failure_reason
305
- job_status=$(echo "${describe_output}" | python3 -c "
306
- import sys, json
307
- d = json.load(sys.stdin)
308
- print(d.get('ProcessingJobStatus', 'Unknown'))
309
- " 2>/dev/null) || job_status="Unknown"
310
-
311
- failure_reason=$(echo "${describe_output}" | python3 -c "
312
- import sys, json
313
- d = json.load(sys.stdin)
314
- print(d.get('FailureReason', ''))
315
- " 2>/dev/null) || failure_reason=""
316
-
317
- # Print status
318
- local now
319
- now=$(date +%H:%M:%S)
320
- echo " [${now}] Status: ${job_status}"
321
-
322
- # Handle terminal states
323
- case "${job_status}" in
324
- Completed)
325
- echo ""
326
- echo "✅ Processing Job completed: ${job_name}"
327
- echo ""
328
- echo " S3 URI: ${MODEL_S3_URI}"
329
- echo ""
330
- if [ "${UPDATE_CONFIG}" = true ]; then
331
- CONFIG_FILE="${SCRIPT_DIR}/config"
332
- sed -i.bak "s|^export MODEL_NAME=.*|export MODEL_NAME=\"${MODEL_S3_URI}\"|" "${CONFIG_FILE}"
333
- rm -f "${CONFIG_FILE}.bak"
334
- echo " ✅ Updated MODEL_NAME in do/config → S3-backed"
335
- echo " Subsequent tasks (submit, deploy) will pull from S3."
336
- else
337
- echo " To use this staged model, update do/config:"
338
- echo " export MODEL_NAME=\"${MODEL_S3_URI}\""
339
- fi
340
- return 0
341
- ;;
342
- Failed)
343
- echo ""
344
- echo "❌ Processing Job failed: ${job_name}"
345
- if [ -n "${failure_reason}" ]; then
346
- echo " Reason: ${failure_reason}"
347
- fi
348
- echo ""
349
- echo " Check CloudWatch logs:"
350
- echo " /aws/sagemaker/ProcessingJobs/${job_name}"
351
- echo ""
352
- echo " To retry: ./do/stage --submit --force"
353
- return 1
354
- ;;
355
- Stopped)
356
- echo ""
357
- echo "⏹️ Processing Job was stopped: ${job_name}"
358
- echo ""
359
- echo " To retry: ./do/stage --submit --force"
360
- return 2
361
- ;;
362
- esac
363
-
364
- sleep "${POLL_INTERVAL}"
365
- done
169
+ # Update config if requested and we have a valid S3 URI
170
+ if [ "${UPDATE_CONFIG}" = true ] && [ -n "${s3_uri}" ] && [ "${job_status}" != "Submitted" ]; then
171
+ CONFIG_FILE="${SCRIPT_DIR}/config"
172
+ sed -i.bak "s|^export MODEL_NAME=.*|export MODEL_NAME=\"${s3_uri}\"|" "${CONFIG_FILE}"
173
+ rm -f "${CONFIG_FILE}.bak"
174
+ echo ""
175
+ echo " ✅ Updated MODEL_NAME in do/config → S3-backed"
176
+ echo " Subsequent tasks (submit, deploy) will pull from S3."
177
+ fi
366
178
  }
367
179
 
368
180
  # ── Check if model is already an S3 URI ──────────────────────────────────────
@@ -410,21 +222,28 @@ if [ -z "${STAGE_S3_BUCKET}" ]; then
410
222
  exit 1
411
223
  fi
412
224
 
413
- # Target S3 path for staged model
414
- MODEL_S3_URI="s3://${STAGE_S3_BUCKET}/models/${PROJECT_NAME}/"
225
+ # Target S3 path for staged model: s3://{bucket}/{project}/models/{model-slug}/
226
+ # Sanitize MODEL_NAME for use as an S3 path segment:
227
+ # - Replace / with -- (e.g., "nvidia/Nemotron-3-Ultra..." → "nvidia--Nemotron-3-Ultra...")
228
+ # - This prevents HF org/repo IDs from creating nested S3 prefixes
229
+ MODEL_SLUG="${MODEL_NAME//\//-}"
230
+ MODEL_S3_URI="s3://${STAGE_S3_BUCKET}/${PROJECT_NAME}/models/${MODEL_SLUG}/"
415
231
 
416
232
  echo " Target: ${MODEL_S3_URI}"
417
233
  echo ""
418
234
 
419
- # ── Submit mode: SageMaker Processing Job ─────────────────────────────────────
420
- # For very large models (>500GB) that exceed local disk, submit a Processing Job
421
- # with 2TB attached storage. The job downloads from HuggingFace and syncs to S3.
422
- if [ "${SUBMIT_MODE}" = true ]; then
235
+ # ── Default mode: SageMaker Processing Job via .stage_helper.py ───────────────
236
+ # Submits a Processing Job that downloads model weights from HuggingFace and
237
+ # syncs to S3 directly no local disk usage. Uses sagemaker-core SDK v3.
238
+ if [ "${LOCAL_MODE}" = false ]; then
423
239
  _submit_processing_job
424
240
  exit $?
425
241
  fi
426
242
 
427
- # ── Idempotency: check if model is already staged ────────────────────────────
243
+ # ── Local mode: download locally then sync to S3 (--local flag) ───────────────
244
+ # Preserved for offline work, debugging, or when Processing Jobs are unavailable.
245
+
246
+ # Idempotency: check if model is already staged
428
247
  if [ "${FORCE}" = false ]; then
429
248
  if aws s3 ls "${MODEL_S3_URI}config.json" --region "${AWS_REGION}" &>/dev/null; then
430
249
  echo "✅ Model already staged at: ${MODEL_S3_URI}"
@@ -443,7 +262,7 @@ if [ "${FORCE}" = false ]; then
443
262
  fi
444
263
  fi
445
264
 
446
- # ── Validate prerequisites ───────────────────────────────────────────────────
265
+ # Validate prerequisites
447
266
  if ! command -v huggingface-cli &>/dev/null; then
448
267
  echo "❌ huggingface-cli is not installed"
449
268
  echo " Install: pip install huggingface_hub[cli] hf_transfer"
package/templates/do/test CHANGED
@@ -16,8 +16,18 @@ source "${SCRIPT_DIR}/lib/profile.sh"
16
16
  # SageMaker Real-Time Inference Testing
17
17
  # ============================================================
18
18
 
19
- # Parse arguments: ./do/test [<ic-name>]
20
- IC_ARG="${1:-}"
19
+ # Parse arguments: ./do/test [<ic-name>] or ./do/test --adapter <name>
20
+ IC_ARG=""
21
+ if [ "${1:-}" = "--adapter" ] || [ "${1:-}" = "-a" ]; then
22
+ if [ -z "${2:-}" ]; then
23
+ echo "❌ --adapter requires an adapter name argument"
24
+ echo " Usage: ./do/test --adapter <name>"
25
+ exit 1
26
+ fi
27
+ IC_ARG="$2"
28
+ else
29
+ IC_ARG="${1:-}"
30
+ fi
21
31
 
22
32
  # Determine test mode based on ENDPOINT_NAME in config
23
33
  if [ -z "${ENDPOINT_NAME:-}" ]; then