@aws/ml-container-creator 0.10.3 → 0.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/config/parameter-schema-v2.json +28 -1
  2. package/infra/ci-harness/lib/ci-harness-stack.ts +50 -36
  3. package/package.json +14 -5
  4. package/servers/instance-sizer/index.js +30 -17
  5. package/servers/instance-sizer/lib/instance-ranker.js +44 -0
  6. package/servers/lib/catalogs/instances.json +27 -0
  7. package/src/app.js +22 -1
  8. package/src/lib/bootstrap-command-handler.js +32 -3
  9. package/src/lib/config-validator.js +1 -1
  10. package/src/lib/generated/cli-options.js +7 -2
  11. package/src/lib/generated/parameter-matrix.js +16 -5
  12. package/src/lib/generated/validation-rules.js +7 -3
  13. package/src/lib/path-prover-brain.js +58 -1
  14. package/src/lib/prompts/infrastructure-prompts.js +2 -2
  15. package/src/lib/prompts/model-prompts.js +6 -0
  16. package/src/lib/prove-pipeline-executor.js +294 -0
  17. package/src/lib/secrets-prompt-runner.js +4 -0
  18. package/src/lib/template-manager.js +1 -1
  19. package/src/lib/template-variable-resolver.js +62 -0
  20. package/templates/do/README.md +37 -0
  21. package/templates/do/adapter +8 -0
  22. package/templates/do/build +8 -0
  23. package/templates/do/clean.d/async-inference.ejs +8 -0
  24. package/templates/do/clean.d/batch-transform.ejs +8 -0
  25. package/templates/do/clean.d/hyperpod-eks.ejs +8 -0
  26. package/templates/do/clean.d/managed-inference.ejs +8 -0
  27. package/templates/do/config +12 -45
  28. package/templates/do/deploy.d/async-inference.ejs +33 -3
  29. package/templates/do/deploy.d/batch-transform.ejs +32 -3
  30. package/templates/do/deploy.d/hyperpod-eks.ejs +7 -0
  31. package/templates/do/deploy.d/managed-inference.ejs +27 -3
  32. package/templates/do/lib/endpoint-config.sh +1 -1
  33. package/templates/do/lib/profile.sh +44 -0
  34. package/templates/do/lib/staged-assets.sh +217 -0
  35. package/templates/do/push +8 -0
  36. package/templates/do/register +8 -0
  37. package/templates/do/stage +569 -0
  38. package/templates/do/submit +10 -0
  39. package/templates/do/test +1 -0
  40. package/templates/do/tune +7 -0
@@ -0,0 +1,569 @@
1
+ #!/bin/bash
2
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ # do/stage — Pre-stage model weights from HuggingFace to S3
6
+ # Downloads the model using huggingface-cli and syncs to S3 so that
7
+ # vLLM can load directly from S3 at deploy time (fast cold-start).
8
+ #
9
+ # Idempotent: if the model is already staged (config.json exists at
10
+ # the target S3 path), the script exits early.
11
+ #
12
+ # Usage:
13
+ # ./do/stage Stage model to S3
14
+ # ./do/stage --force Re-stage even if already present in S3
15
+ # ./do/stage --update-config Stage and update MODEL_NAME in do/config
16
+ # ./do/stage --submit Submit as SageMaker Processing Job (for models >500GB)
17
+ # ./do/stage --submit --no-wait Submit and exit without polling
18
+
19
+ set -e
20
+ set -u
21
+ set -o pipefail
22
+
23
+ # ── Source project configuration ──────────────────────────────────────────────
24
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
25
+ source "${SCRIPT_DIR}/config"
26
+ source "${SCRIPT_DIR}/lib/profile.sh"
27
+ source "${SCRIPT_DIR}/lib/staged-assets.sh"
28
+
29
+ # ── Parse flags ───────────────────────────────────────────────────────────────
30
+ FORCE=false
31
+ UPDATE_CONFIG=false
32
+ SUBMIT_MODE=false
33
+ NO_WAIT=false
34
+ while [ $# -gt 0 ]; do
35
+ case "$1" in
36
+ --force) FORCE=true; shift ;;
37
+ --update-config) UPDATE_CONFIG=true; shift ;;
38
+ --submit) SUBMIT_MODE=true; shift ;;
39
+ --no-wait) NO_WAIT=true; shift ;;
40
+ --help|-h)
41
+ echo "Usage: ./do/stage [--force] [--update-config] [--submit] [--no-wait]"
42
+ echo ""
43
+ echo "Pre-stage model weights from HuggingFace to S3."
44
+ echo ""
45
+ echo "Modes:"
46
+ echo " (default) Download locally then sync to S3"
47
+ echo " --submit Submit as SageMaker Processing Job (for models >500GB)"
48
+ echo ""
49
+ echo "Options:"
50
+ echo " --force Re-stage even if model already exists in S3"
51
+ echo " --update-config Update MODEL_NAME in do/config to the staged S3 URI"
52
+ echo " --no-wait (with --submit) Exit without polling for completion"
53
+ echo ""
54
+ echo "Environment:"
55
+ echo " HF_TOKEN HuggingFace token (for gated models)"
56
+ echo ""
57
+ echo "The staged S3 URI will be printed on completion."
58
+ echo "Pass --update-config to automatically update do/config for S3-backed deploys."
59
+ echo ""
60
+ echo "The --submit mode uses a SageMaker Processing Job with 2TB attached"
61
+ echo "storage, suitable for very large models that exceed local disk capacity."
62
+ exit 0
63
+ ;;
64
+ *) shift ;;
65
+ esac
66
+ done
67
+
68
+ # ── Processing Job submission function ────────────────────────────────────────
69
+ # Submits a SageMaker Processing Job that downloads model weights from HuggingFace
70
+ # and syncs them to S3. Uses 2TB attached storage to handle any model size.
71
+ POLL_INTERVAL=30
72
+ PROCESSING_JOB_INSTANCE_TYPE="ml.m5.xlarge"
73
+ PROCESSING_JOB_VOLUME_GB=2048
74
+
75
+ _submit_processing_job() {
76
+ echo "🚀 Submitting SageMaker Processing Job for model staging"
77
+ echo " Model: ${MODEL_NAME}"
78
+ echo " Target: ${MODEL_S3_URI}"
79
+ echo " Instance: ${PROCESSING_JOB_INSTANCE_TYPE}"
80
+ echo " Storage: ${PROCESSING_JOB_VOLUME_GB} GB"
81
+ echo ""
82
+
83
+ # Validate AWS credentials
84
+ if ! aws sts get-caller-identity &>/dev/null; then
85
+ echo "❌ AWS credentials not configured or expired."
86
+ echo " Run: aws configure"
87
+ exit 4
88
+ fi
89
+
90
+ # Resolve execution role from profile
91
+ local execution_role
92
+ execution_role=$(echo "${_PROFILE_JSON}" | python3 -c "
93
+ import sys, json
94
+ p = json.load(sys.stdin)
95
+ print(p.get('executionRoleArn', ''))
96
+ " 2>/dev/null) || execution_role=""
97
+
98
+ if [ -z "${execution_role}" ]; then
99
+ echo "❌ No execution role configured."
100
+ echo " Run 'ml-container-creator bootstrap' to set up your profile."
101
+ echo " The role needs: SageMaker, S3, and Secrets Manager permissions."
102
+ exit 1
103
+ fi
104
+
105
+ # Resolve HF token ARN for the processing job (optional — for gated models)
106
+ local hf_token_secret_arn="${HF_TOKEN_ARN:-}"
107
+
108
+ # Generate job name with timestamp
109
+ local timestamp
110
+ timestamp=$(date +%Y%m%d-%H%M%S)
111
+ local job_name="mlcc-stage-${PROJECT_NAME}-${timestamp}"
112
+ # SageMaker job names max 63 chars, must match [a-zA-Z0-9](-*[a-zA-Z0-9])*
113
+ job_name=$(echo "${job_name}" | cut -c1-63 | sed 's/[^a-zA-Z0-9-]/-/g' | sed 's/-*$//')
114
+
115
+ echo " Job name: ${job_name}"
116
+ echo ""
117
+
118
+ # Build the entrypoint script that runs inside the processing container
119
+ local entrypoint_script
120
+ entrypoint_script=$(cat <<'ENTRYPOINT_EOF'
121
+ #!/bin/bash
122
+ set -e
123
+ set -o pipefail
124
+
125
+ echo "=== MCC Model Staging Processing Job ==="
126
+ echo "Model: ${MODEL_ID}"
127
+ echo "Target: ${S3_OUTPUT_URI}"
128
+ echo ""
129
+
130
+ # Install dependencies
131
+ echo "📦 Installing huggingface-cli and hf_transfer..."
132
+ pip install -q huggingface_hub[cli] hf_transfer
133
+
134
+ # Enable fast parallel downloads
135
+ export HF_HUB_ENABLE_HF_TRANSFER=1
136
+
137
+ # Set HF token if provided
138
+ if [ -n "${HF_TOKEN:-}" ]; then
139
+ echo "🔐 Using provided HuggingFace token"
140
+ fi
141
+
142
+ # Download model from HuggingFace
143
+ echo ""
144
+ echo "⬇️ Downloading model: ${MODEL_ID}"
145
+ DOWNLOAD_ARGS="${MODEL_ID}"
146
+ if [ -n "${HF_TOKEN:-}" ]; then
147
+ DOWNLOAD_ARGS="${DOWNLOAD_ARGS} --token ${HF_TOKEN}"
148
+ fi
149
+ huggingface-cli download ${DOWNLOAD_ARGS}
150
+
151
+ echo ""
152
+ echo "✅ Download complete"
153
+
154
+ # Locate downloaded files
155
+ CACHE_PATH=$(python3 -c "
156
+ from huggingface_hub import snapshot_download
157
+ path = snapshot_download('${MODEL_ID}', local_files_only=True)
158
+ print(path)
159
+ ")
160
+
161
+ echo "📁 Cache path: ${CACHE_PATH}"
162
+
163
+ # Sync to S3
164
+ echo ""
165
+ echo "☁️ Syncing to S3: ${S3_OUTPUT_URI}"
166
+ aws s3 sync "${CACHE_PATH}" "${S3_OUTPUT_URI}" \
167
+ --no-progress \
168
+ --exclude "*.lock" \
169
+ --exclude ".gitattributes"
170
+
171
+ echo ""
172
+ echo "✅ Model staged successfully to: ${S3_OUTPUT_URI}"
173
+ ENTRYPOINT_EOF
174
+ )
175
+
176
+ # Build environment variables for the container
177
+ local env_vars="MODEL_ID=${MODEL_NAME},S3_OUTPUT_URI=${MODEL_S3_URI}"
178
+ if [ -n "${hf_token_secret_arn}" ]; then
179
+ # Resolve token and pass as env var to the job
180
+ local hf_token_value=""
181
+ hf_token_value=$(aws secretsmanager get-secret-value \
182
+ --secret-id "${hf_token_secret_arn}" \
183
+ --query SecretString --output text 2>/dev/null) || hf_token_value=""
184
+ if [ -n "${hf_token_value}" ]; then
185
+ env_vars="${env_vars},HF_TOKEN=${hf_token_value}"
186
+ fi
187
+ elif [ -n "${HF_TOKEN:-}" ]; then
188
+ env_vars="${env_vars},HF_TOKEN=${HF_TOKEN}"
189
+ fi
190
+
191
+ # Write entrypoint to a temp file for the processing job input
192
+ local entrypoint_s3_key="staging-jobs/${job_name}/entrypoint.sh"
193
+ local entrypoint_s3_uri="s3://${STAGE_S3_BUCKET}/${entrypoint_s3_key}"
194
+
195
+ echo "📤 Uploading entrypoint script..."
196
+ echo "${entrypoint_script}" | aws s3 cp - "${entrypoint_s3_uri}" --region "${AWS_REGION}"
197
+
198
+ # Create the processing job
199
+ # Uses a lightweight Python image with AWS CLI pre-installed
200
+ local container_image="763104351884.dkr.ecr.${AWS_REGION}.amazonaws.com/pytorch-training:2.1.0-cpu-py310-ubuntu20.04-sagemaker"
201
+
202
+ local processing_request
203
+ processing_request=$(python3 -c "
204
+ import json, sys
205
+
206
+ job = {
207
+ 'ProcessingJobName': '${job_name}',
208
+ 'ProcessingResources': {
209
+ 'ClusterConfig': {
210
+ 'InstanceCount': 1,
211
+ 'InstanceType': '${PROCESSING_JOB_INSTANCE_TYPE}',
212
+ 'VolumeSizeInGB': ${PROCESSING_JOB_VOLUME_GB}
213
+ }
214
+ },
215
+ 'AppSpecification': {
216
+ 'ImageUri': '${container_image}',
217
+ 'ContainerEntrypoint': ['bash', '-c'],
218
+ 'ContainerArguments': ['aws s3 cp ${entrypoint_s3_uri} /tmp/entrypoint.sh && chmod +x /tmp/entrypoint.sh && /tmp/entrypoint.sh']
219
+ },
220
+ 'Environment': dict(item.split('=', 1) for item in '${env_vars}'.split(',')),
221
+ 'RoleArn': '${execution_role}',
222
+ 'StoppingCondition': {
223
+ 'MaxRuntimeInSeconds': 86400
224
+ }
225
+ }
226
+
227
+ print(json.dumps(job, indent=2))
228
+ ")
229
+
230
+ # Write request JSON to temp file
231
+ local request_file="/tmp/mlcc-stage-request-${timestamp}.json"
232
+ echo "${processing_request}" > "${request_file}"
233
+
234
+ echo "🚀 Creating Processing Job: ${job_name}"
235
+ echo ""
236
+
237
+ local create_output
238
+ local create_exit_code
239
+ create_output=$(aws sagemaker create-processing-job \
240
+ --cli-input-json "file://${request_file}" \
241
+ --region "${AWS_REGION}" 2>&1) || create_exit_code=$?
242
+ create_exit_code=${create_exit_code:-0}
243
+
244
+ rm -f "${request_file}"
245
+
246
+ if [ ${create_exit_code} -ne 0 ]; then
247
+ echo "❌ Failed to create Processing Job"
248
+ echo " ${create_output}"
249
+ echo ""
250
+ if echo "${create_output}" | grep -q "AccessDeniedException"; then
251
+ echo " Remediation: ensure the execution role has sagemaker:CreateProcessingJob permission"
252
+ fi
253
+ exit 1
254
+ fi
255
+
256
+ echo " ✅ Processing Job submitted: ${job_name}"
257
+ echo ""
258
+
259
+ # Handle --no-wait
260
+ if [ "${NO_WAIT}" = true ]; then
261
+ echo " --no-wait specified. Job submitted, exiting without polling."
262
+ echo ""
263
+ echo " Check status:"
264
+ echo " aws sagemaker describe-processing-job --processing-job-name ${job_name} --region ${AWS_REGION}"
265
+ echo ""
266
+ echo " On completion, the staged model will be at:"
267
+ echo " ${MODEL_S3_URI}"
268
+ return 0
269
+ fi
270
+
271
+ # Poll for completion
272
+ _poll_processing_job "${job_name}"
273
+ }
274
+
275
+ # ── Poll Processing Job status ────────────────────────────────────────────────
276
+ _poll_processing_job() {
277
+ local job_name="$1"
278
+
279
+ echo "⏳ Polling Processing Job status (every ${POLL_INTERVAL}s)..."
280
+ echo " (Ctrl+C to stop polling — job continues in background)"
281
+ echo ""
282
+
283
+ while true; do
284
+ local describe_output
285
+ local describe_exit_code
286
+ describe_output=$(aws sagemaker describe-processing-job \
287
+ --processing-job-name "${job_name}" \
288
+ --region "${AWS_REGION}" 2>&1) || describe_exit_code=$?
289
+ describe_exit_code=${describe_exit_code:-0}
290
+
291
+ if [ ${describe_exit_code} -ne 0 ]; then
292
+ echo " ⚠️ Failed to describe job (will retry): ${describe_output}"
293
+ sleep "${POLL_INTERVAL}"
294
+ continue
295
+ fi
296
+
297
+ # Parse status from response
298
+ local job_status
299
+ local failure_reason
300
+ job_status=$(echo "${describe_output}" | python3 -c "
301
+ import sys, json
302
+ d = json.load(sys.stdin)
303
+ print(d.get('ProcessingJobStatus', 'Unknown'))
304
+ " 2>/dev/null) || job_status="Unknown"
305
+
306
+ failure_reason=$(echo "${describe_output}" | python3 -c "
307
+ import sys, json
308
+ d = json.load(sys.stdin)
309
+ print(d.get('FailureReason', ''))
310
+ " 2>/dev/null) || failure_reason=""
311
+
312
+ # Print status
313
+ local now
314
+ now=$(date +%H:%M:%S)
315
+ echo " [${now}] Status: ${job_status}"
316
+
317
+ # Handle terminal states
318
+ case "${job_status}" in
319
+ Completed)
320
+ echo ""
321
+ echo "✅ Processing Job completed: ${job_name}"
322
+ echo ""
323
+ echo " S3 URI: ${MODEL_S3_URI}"
324
+ echo ""
325
+ if [ "${UPDATE_CONFIG}" = true ]; then
326
+ CONFIG_FILE="${SCRIPT_DIR}/config"
327
+ sed -i.bak "s|^export MODEL_NAME=.*|export MODEL_NAME=\"${MODEL_S3_URI}\"|" "${CONFIG_FILE}"
328
+ rm -f "${CONFIG_FILE}.bak"
329
+ echo " ✅ Updated MODEL_NAME in do/config → ${MODEL_S3_URI}"
330
+ echo ""
331
+ echo " Re-deploy with S3-backed model: ./do/deploy"
332
+ else
333
+ echo " To use this staged model, update do/config:"
334
+ echo " export MODEL_NAME=\"${MODEL_S3_URI}\""
335
+ echo ""
336
+ echo " Or re-run with --update-config:"
337
+ echo " ./do/stage --submit --update-config"
338
+ fi
339
+ return 0
340
+ ;;
341
+ Failed)
342
+ echo ""
343
+ echo "❌ Processing Job failed: ${job_name}"
344
+ if [ -n "${failure_reason}" ]; then
345
+ echo " Reason: ${failure_reason}"
346
+ fi
347
+ echo ""
348
+ echo " Check CloudWatch logs:"
349
+ echo " /aws/sagemaker/ProcessingJobs/${job_name}"
350
+ echo ""
351
+ echo " To retry: ./do/stage --submit --force"
352
+ return 1
353
+ ;;
354
+ Stopped)
355
+ echo ""
356
+ echo "⏹️ Processing Job was stopped: ${job_name}"
357
+ echo ""
358
+ echo " To retry: ./do/stage --submit --force"
359
+ return 2
360
+ ;;
361
+ esac
362
+
363
+ sleep "${POLL_INTERVAL}"
364
+ done
365
+ }
366
+
367
+ # ── Check if model is already an S3 URI ──────────────────────────────────────
368
+ if [[ "${MODEL_NAME}" == s3://* ]]; then
369
+ echo "✅ Model is already an S3 URI: ${MODEL_NAME}"
370
+ echo " Nothing to stage."
371
+ exit 0
372
+ fi
373
+
374
+ echo "📦 Staging model: ${MODEL_NAME}"
375
+ echo " Project: ${PROJECT_NAME}"
376
+ echo ""
377
+
378
+ # ── Resolve profile for S3 bucket ────────────────────────────────────────────
379
+ _PROFILE_JSON=""
380
+ if command -v python3 &>/dev/null; then
381
+ _PROFILE_JSON=$(python3 -c "
382
+ import json, os
383
+ config_path = os.path.expanduser('~/.ml-container-creator/config.json')
384
+ try:
385
+ with open(config_path) as f:
386
+ config = json.load(f)
387
+ profile = config['profiles'][config['activeProfile']]
388
+ print(json.dumps(profile))
389
+ except:
390
+ print('{}')
391
+ " 2>/dev/null) || _PROFILE_JSON="{}"
392
+ fi
393
+
394
+ # Extract the benchmark S3 bucket from profile (used for model staging)
395
+ STAGE_S3_BUCKET=$(echo "${_PROFILE_JSON}" | python3 -c "
396
+ import sys, json
397
+ p = json.load(sys.stdin)
398
+ bucket = p.get('benchmarkS3Bucket', '')
399
+ if not bucket:
400
+ acct = p.get('accountId', 'unknown')
401
+ region = p.get('awsRegion', 'us-east-1')
402
+ bucket = f'ml-container-creator-benchmark-{region}-{acct}'
403
+ print(bucket)
404
+ " 2>/dev/null) || STAGE_S3_BUCKET=""
405
+
406
+ if [ -z "${STAGE_S3_BUCKET}" ]; then
407
+ echo "❌ Could not determine S3 bucket for staging."
408
+ echo " Run 'ml-container-creator bootstrap' to set up your profile."
409
+ exit 1
410
+ fi
411
+
412
+ # Target S3 path for staged model
413
+ MODEL_S3_URI="s3://${STAGE_S3_BUCKET}/models/${PROJECT_NAME}/"
414
+
415
+ echo " Target: ${MODEL_S3_URI}"
416
+ echo ""
417
+
418
+ # ── Submit mode: SageMaker Processing Job ─────────────────────────────────────
419
+ # For very large models (>500GB) that exceed local disk, submit a Processing Job
420
+ # with 2TB attached storage. The job downloads from HuggingFace and syncs to S3.
421
+ if [ "${SUBMIT_MODE}" = true ]; then
422
+ _submit_processing_job
423
+ exit $?
424
+ fi
425
+
426
+ # ── Idempotency: check if model is already staged ────────────────────────────
427
+ if [ "${FORCE}" = false ]; then
428
+ if aws s3 ls "${MODEL_S3_URI}config.json" --region "${AWS_REGION}" &>/dev/null; then
429
+ echo "✅ Model already staged at: ${MODEL_S3_URI}"
430
+ echo " Use --force to re-stage."
431
+ echo ""
432
+ if [ "${UPDATE_CONFIG}" = true ]; then
433
+ CONFIG_FILE="${SCRIPT_DIR}/config"
434
+ sed -i.bak "s|^export MODEL_NAME=.*|export MODEL_NAME=\"${MODEL_S3_URI}\"|" "${CONFIG_FILE}"
435
+ rm -f "${CONFIG_FILE}.bak"
436
+ echo " ✅ Updated MODEL_NAME in do/config → ${MODEL_S3_URI}"
437
+ else
438
+ echo " To use this staged model, set in do/config:"
439
+ echo " export MODEL_NAME=\"${MODEL_S3_URI}\""
440
+ fi
441
+ exit 0
442
+ fi
443
+ fi
444
+
445
+ # ── Validate prerequisites ───────────────────────────────────────────────────
446
+ if ! command -v huggingface-cli &>/dev/null; then
447
+ echo "❌ huggingface-cli is not installed"
448
+ echo " Install: pip install huggingface_hub[cli] hf_transfer"
449
+ exit 2
450
+ fi
451
+
452
+ if ! command -v aws &>/dev/null; then
453
+ echo "❌ AWS CLI is not installed"
454
+ echo " Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html"
455
+ exit 2
456
+ fi
457
+
458
+ # Validate AWS credentials
459
+ if ! aws sts get-caller-identity &>/dev/null; then
460
+ echo "❌ AWS credentials not configured or expired."
461
+ echo " Run: aws configure"
462
+ exit 4
463
+ fi
464
+
465
+ # ── Resolve HuggingFace token (for gated models) ─────────────────────────────
466
+ if [ -n "${HF_TOKEN_ARN:-}" ] && [ -z "${HF_TOKEN:-}" ]; then
467
+ echo "🔐 Resolving HuggingFace token from Secrets Manager..."
468
+ HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text) || {
469
+ echo "⚠️ Failed to resolve HF token from Secrets Manager (continuing without token)"
470
+ HF_TOKEN=""
471
+ }
472
+ export HF_TOKEN
473
+ fi
474
+
475
+ # ── Download model from HuggingFace ──────────────────────────────────────────
476
+ echo "⬇️ Downloading model from HuggingFace: ${MODEL_NAME}"
477
+ echo " Using hf_transfer for fast parallel downloads..."
478
+ echo ""
479
+
480
+ # Enable fast parallel downloads via hf_transfer
481
+ export HF_HUB_ENABLE_HF_TRANSFER=1
482
+
483
+ # Download to HF cache (huggingface-cli manages cache location)
484
+ DOWNLOAD_ARGS=("${MODEL_NAME}")
485
+ if [ -n "${HF_TOKEN:-}" ]; then
486
+ DOWNLOAD_ARGS+=("--token" "${HF_TOKEN}")
487
+ fi
488
+
489
+ if ! huggingface-cli download "${DOWNLOAD_ARGS[@]}"; then
490
+ echo "❌ Failed to download model from HuggingFace: ${MODEL_NAME}"
491
+ echo ""
492
+ echo "Possible causes:"
493
+ echo " • Model name is incorrect"
494
+ echo " • Model is gated and requires HF_TOKEN"
495
+ echo " • Network connectivity issues"
496
+ exit 3
497
+ fi
498
+
499
+ echo ""
500
+ echo "✅ Download complete"
501
+
502
+ # ── Locate downloaded files in HF cache ───────────────────────────────────────
503
+ # huggingface-cli downloads to ~/.cache/huggingface/hub/models--<org>--<name>/snapshots/<rev>/
504
+ HF_CACHE_DIR=$(python3 -c "
505
+ from huggingface_hub import snapshot_download
506
+ import os
507
+ path = snapshot_download('${MODEL_NAME}', local_files_only=True)
508
+ print(path)
509
+ " 2>/dev/null) || HF_CACHE_DIR=""
510
+
511
+ if [ -z "${HF_CACHE_DIR}" ] || [ ! -d "${HF_CACHE_DIR}" ]; then
512
+ # Fallback: construct the path manually
513
+ MODEL_DIR_NAME=$(echo "${MODEL_NAME}" | tr '/' '--')
514
+ HF_CACHE_DIR="${HOME}/.cache/huggingface/hub/models--${MODEL_DIR_NAME}/snapshots"
515
+ # Use the latest snapshot
516
+ if [ -d "${HF_CACHE_DIR}" ]; then
517
+ HF_CACHE_DIR=$(ls -td "${HF_CACHE_DIR}"/*/ 2>/dev/null | head -1)
518
+ fi
519
+ fi
520
+
521
+ if [ -z "${HF_CACHE_DIR}" ] || [ ! -d "${HF_CACHE_DIR}" ]; then
522
+ echo "❌ Could not locate downloaded model files in HuggingFace cache"
523
+ echo " Expected location: ~/.cache/huggingface/hub/models--${MODEL_NAME//\//-}/snapshots/"
524
+ exit 3
525
+ fi
526
+
527
+ echo "📁 Model cache: ${HF_CACHE_DIR}"
528
+
529
+ # ── Sync to S3 ───────────────────────────────────────────────────────────────
530
+ echo ""
531
+ echo "☁️ Syncing model to S3: ${MODEL_S3_URI}"
532
+ echo " This may take a while for large models..."
533
+ echo ""
534
+
535
+ if ! aws s3 sync "${HF_CACHE_DIR}" "${MODEL_S3_URI}" \
536
+ --region "${AWS_REGION}" \
537
+ --no-progress \
538
+ --exclude "*.lock" \
539
+ --exclude ".gitattributes"; then
540
+ echo "❌ Failed to sync model to S3"
541
+ echo ""
542
+ echo "Possible causes:"
543
+ echo " • Missing S3 write permissions (s3:PutObject)"
544
+ echo " • Bucket does not exist (run 'ml-container-creator bootstrap')"
545
+ echo " • Network connectivity issues"
546
+ exit 4
547
+ fi
548
+
549
+ echo ""
550
+ echo "✅ Model staged successfully!"
551
+ echo ""
552
+ echo " S3 URI: ${MODEL_S3_URI}"
553
+ echo ""
554
+ if [ "${UPDATE_CONFIG}" = true ]; then
555
+ CONFIG_FILE="${SCRIPT_DIR}/config"
556
+ sed -i.bak "s|^export MODEL_NAME=.*|export MODEL_NAME=\"${MODEL_S3_URI}\"|" "${CONFIG_FILE}"
557
+ rm -f "${CONFIG_FILE}.bak"
558
+ echo " ✅ Updated MODEL_NAME in do/config → ${MODEL_S3_URI}"
559
+ echo ""
560
+ echo " Re-deploy with S3-backed model: ./do/deploy"
561
+ else
562
+ echo " To use this staged model, update do/config:"
563
+ echo " export MODEL_NAME=\"${MODEL_S3_URI}\""
564
+ echo ""
565
+ echo " Or re-run with --update-config to do it automatically:"
566
+ echo " ./do/stage --update-config"
567
+ echo ""
568
+ echo " Then re-deploy: ./do/deploy"
569
+ fi
@@ -9,6 +9,16 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
13
+
14
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
+ # Disable unbound-variable checking for associative array access (bash 3.2 compat)
16
+ set +u
17
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
18
+ set -u
19
+
20
+ # ── Derived variables (env var > computed default) ────────────────────────────
21
+ CODEBUILD_PROJECT_NAME="${CODEBUILD_PROJECT_NAME:-${PROJECT_NAME}-build-$(date +%Y%m%d)}"
12
22
 
13
23
  echo "🚀 Submitting CodeBuild job for ${PROJECT_NAME}"
14
24
  echo " Deployment config: ${DEPLOYMENT_CONFIG}"
package/templates/do/test CHANGED
@@ -9,6 +9,7 @@ set -o pipefail
9
9
  # Source configuration
10
10
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
11
11
  source "${SCRIPT_DIR}/config"
12
+ source "${SCRIPT_DIR}/lib/profile.sh"
12
13
 
13
14
  <% if (deploymentTarget === 'realtime-inference') { %>
14
15
  # ============================================================
package/templates/do/tune CHANGED
@@ -13,6 +13,13 @@ set -o pipefail
13
13
  # ── Source project configuration ──────────────────────────────────────────────
14
14
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15
15
  source "${SCRIPT_DIR}/config"
16
+ source "${SCRIPT_DIR}/lib/profile.sh"
17
+
18
+ # ── Profile-resolved variables (env var > profile > default) ──────────────────
19
+ # Disable unbound-variable checking for associative array access (bash 3.2 compat)
20
+ set +u
21
+ TUNE_S3_BUCKET="${TUNE_S3_BUCKET:-mlcc-tune-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
22
+ set -u
16
23
 
17
24
  # ── Constants ─────────────────────────────────────────────────────────────────
18
25
  CATALOG_FILE="${SCRIPT_DIR}/.tune_catalog.json"