@aws/ml-container-creator 0.12.1 → 0.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/package.json +11 -3
  2. package/servers/instance-sizer/lib/model-resolver.js +127 -185
  3. package/servers/instance-sizer/lib/vram-estimator.js +86 -0
  4. package/servers/lib/catalogs/instances.json +0 -27
  5. package/src/app.js +14 -0
  6. package/src/lib/bootstrap-command-handler.js +2 -2
  7. package/src/lib/generated/cli-options.js +1 -1
  8. package/src/lib/generated/parameter-matrix.js +1 -1
  9. package/src/lib/generated/validation-rules.js +1 -1
  10. package/src/lib/prompt-runner.js +14 -31
  11. package/src/lib/prove-pipeline-executor.js +294 -0
  12. package/templates/IAM_PERMISSIONS.md +64 -13
  13. package/templates/do/.tune_helper.py +5 -2
  14. package/templates/do/README.md +50 -567
  15. package/templates/do/adapter +1 -1
  16. package/templates/do/build +2 -2
  17. package/templates/do/clean.d/async-inference.ejs +2 -2
  18. package/templates/do/clean.d/batch-transform.ejs +2 -2
  19. package/templates/do/clean.d/hyperpod-eks.ejs +2 -2
  20. package/templates/do/clean.d/managed-inference.ejs +2 -2
  21. package/templates/do/deploy.d/async-inference.ejs +6 -6
  22. package/templates/do/deploy.d/batch-transform.ejs +4 -4
  23. package/templates/do/deploy.d/hyperpod-eks.ejs +1 -1
  24. package/templates/do/deploy.d/managed-inference.ejs +15 -3
  25. package/templates/do/lib/profile.sh +19 -15
  26. package/templates/do/lib/staged-assets.sh +217 -0
  27. package/templates/do/push +2 -2
  28. package/templates/do/register +2 -2
  29. package/templates/do/stage +38 -33
  30. package/templates/do/submit +1 -1
  31. package/templates/do/tune +1 -1
  32. package/templates/MIGRATION.md +0 -488
  33. package/templates/TEMPLATE_SYSTEM.md +0 -243
  34. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
@@ -41,15 +41,15 @@ source "${SCRIPT_DIR}/config"
41
41
  source "${SCRIPT_DIR}/lib/profile.sh"
42
42
 
43
43
  # ── Profile-resolved variables (env var > profile > default) ──────────────────
44
- ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
45
- ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
46
- export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
44
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
45
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
46
+ export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
47
47
 
48
48
  # Async-specific derived variables
49
- _ASYNC_BUCKET="${_PROFILE[asyncS3Bucket]:-mlcc-async-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
49
+ _ASYNC_BUCKET="${_PROFILE_asyncS3Bucket:-mlcc-async-${_PROFILE_accountId:-unknown}-${_PROFILE_awsRegion:-us-east-1}}"
50
50
  ASYNC_S3_OUTPUT_PATH="${ASYNC_S3_OUTPUT_PATH:-s3://${_ASYNC_BUCKET}/${PROJECT_NAME}/output/}"
51
- ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
52
- ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE[awsRegion]:-us-east-1}:${_PROFILE[accountId]:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
51
+ ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE_awsRegion:-us-east-1}:${_PROFILE_accountId:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
52
+ ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE_awsRegion:-us-east-1}:${_PROFILE_accountId:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
53
53
 
54
54
  echo "🚀 Deploying to AWS"
55
55
  echo " Project: ${PROJECT_NAME}"
@@ -41,12 +41,12 @@ source "${SCRIPT_DIR}/config"
41
41
  source "${SCRIPT_DIR}/lib/profile.sh"
42
42
 
43
43
  # ── Profile-resolved variables (env var > profile > default) ──────────────────
44
- ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
45
- ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
46
- export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
44
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
45
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
46
+ export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
47
47
 
48
48
  # Batch-specific derived variables
49
- _BATCH_BUCKET="${_PROFILE[batchS3Bucket]:-mlcc-batch-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
49
+ _BATCH_BUCKET="${_PROFILE_batchS3Bucket:-mlcc-batch-${_PROFILE_accountId:-unknown}-${_PROFILE_awsRegion:-us-east-1}}"
50
50
  BATCH_INPUT_PATH="${BATCH_INPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/input/}"
51
51
  BATCH_OUTPUT_PATH="${BATCH_OUTPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/output/}"
52
52
 
@@ -41,7 +41,7 @@ source "${SCRIPT_DIR}/config"
41
41
  source "${SCRIPT_DIR}/lib/profile.sh"
42
42
 
43
43
  # ── Profile-resolved variables (env var > profile > default) ──────────────────
44
- export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
44
+ export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
45
45
 
46
46
  echo "🚀 Deploying to AWS"
47
47
  echo " Project: ${PROJECT_NAME}"
@@ -214,9 +214,9 @@ source "${SCRIPT_DIR}/config"
214
214
  source "${SCRIPT_DIR}/lib/profile.sh"
215
215
 
216
216
  # ── Profile-resolved variables (env var > profile > default) ──────────────────
217
- ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
218
- ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
219
- export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
217
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
218
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
219
+ export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
220
220
 
221
221
  echo "🚀 Deploying to AWS"
222
222
  echo " Project: ${PROJECT_NAME}"
@@ -329,6 +329,18 @@ if [ -z "${ROLE_ARN:-}" ]; then
329
329
  exit 3
330
330
  fi
331
331
 
332
+ # Validate ROLE_ARN looks like an IAM role ARN
333
+ if ! echo "${ROLE_ARN}" | grep -qE '^arn:aws[a-z-]*:iam::[0-9]{12}:role/.+'; then
334
+ echo "❌ ROLE_ARN is not a valid IAM role ARN:"
335
+ echo " Got: ${ROLE_ARN}"
336
+ echo " Expected format: arn:aws:iam::123456789012:role/RoleName"
337
+ echo ""
338
+ echo " This may indicate a misconfigured bootstrap profile."
339
+ echo " Check ~/.ml-container-creator/config.json 'roleArn' field,"
340
+ echo " or set the correct value: export ROLE_ARN=arn:aws:iam::ACCOUNT:role/YOUR_ROLE"
341
+ exit 3
342
+ fi
343
+
332
344
  echo " Using execution role: ${ROLE_ARN}"
333
345
 
334
346
  # Validate --ic argument if specified (set by --ic <name> or --force-ic <name>)
@@ -1,21 +1,27 @@
1
1
  #!/usr/bin/env bash
2
- # Profile loader — reads active bootstrap profile into _PROFILE[] associative array.
2
+ # Profile loader — reads active bootstrap profile into _PROFILE_<key> variables.
3
3
  # Source this file after do/config. Values provide defaults; explicit env vars take precedence.
4
4
  #
5
- # Requires bash 4+ for associative array support.
6
- # macOS ships with bash 3.2 — install bash 4+ via Homebrew: brew install bash
5
+ # POSIX-compatible: works on bash 3.2+ (macOS default) and bash 4+/5+.
6
+ # No associative arrays required.
7
7
  #
8
- # Expected keys in _PROFILE:
8
+ # After sourcing, access values via:
9
+ # ${_PROFILE_roleArn:-}
10
+ # ${_PROFILE_ecrRepositoryName:-ml-container-creator}
11
+ # ${_PROFILE_awsRegion:-us-east-1}
12
+ # ${_PROFILE_accountId:-}
13
+ # ${_PROFILE_benchmarkS3Bucket:-}
14
+ # ${_PROFILE_asyncS3Bucket:-}
15
+ # ${_PROFILE_batchS3Bucket:-}
16
+ #
17
+ # Expected keys (set as _PROFILE_<key>):
9
18
  # awsRegion, accountId, awsProfile, roleArn, ecrRepositoryName,
10
19
  # benchmarkS3Bucket, ciBenchmarkResultsBucket, asyncS3Bucket, batchS3Bucket,
11
20
  # ciTableName, ciInfraProvisioned
12
21
 
13
22
  # Temporarily disable unbound variable checking for profile loading
14
- # (keys may not exist in the profile config, and declare -A behavior
15
- # varies across bash versions with set -u)
16
23
  set +u 2>/dev/null || true
17
24
 
18
- declare -A _PROFILE 2>/dev/null || true
19
25
  if command -v python3 &>/dev/null; then
20
26
  _PROFILE_RAW=$(python3 -c "
21
27
  import json, os
@@ -23,22 +29,20 @@ try:
23
29
  with open(os.path.expanduser('~/.ml-container-creator/config.json')) as f:
24
30
  c = json.load(f)
25
31
  p = c['profiles'][c['activeProfile']]
26
- # Output as KEY=VALUE lines (simple, no JSON parsing in bash)
32
+ # Output as _PROFILE_KEY=VALUE lines safe for eval with known prefix
27
33
  for k, v in p.items():
28
34
  if isinstance(v, (str, int, float, bool)):
29
- print(f'{k}={v}')
35
+ # Sanitize: only allow alphanumeric key names
36
+ if k.isalnum() or all(c.isalnum() or c == '_' for c in k):
37
+ print(f'_PROFILE_{k}=\"{v}\"')
30
38
  except:
31
39
  pass
32
40
  " 2>/dev/null) || _PROFILE_RAW=""
33
41
 
34
42
  if [ -n "${_PROFILE_RAW}" ]; then
35
- while IFS='=' read -r key value; do
36
- [ -n "${key}" ] && _PROFILE["${key}"]="${value}"
37
- done <<< "${_PROFILE_RAW}"
43
+ eval "${_PROFILE_RAW}"
38
44
  fi
39
45
  fi
40
46
 
41
47
  # NOTE: set -u is NOT re-enabled here. The caller is responsible for managing
42
- # their own shell options. Re-enabling set -u would cause "unbound variable"
43
- # errors when accessing _PROFILE keys on bash versions where empty associative
44
- # arrays are treated as unset (bash 5.x on some platforms).
48
+ # their own shell options.
@@ -0,0 +1,217 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Shared helper: read/write the .mlcc/staged-assets.json tracking file.
5
+ # Sourced by do/stage, do/submit, and other lifecycle scripts.
6
+ #
7
+ # ─── Schema (.mlcc/staged-assets.json) ───────────────────────────────────────
8
+ #
9
+ # {
10
+ # "version": "1",
11
+ # "models": {
12
+ # "<ic-name>": {
13
+ # "source": "<HuggingFace model ID, e.g. google/gemma-4-31B-it>",
14
+ # "staged_uri": "<S3 URI with trailing slash>",
15
+ # "staged_at": "<ISO 8601 timestamp>",
16
+ # "region": "<AWS region where the model was staged>",
17
+ # "size_gb": <numeric size in GB>
18
+ # }
19
+ # },
20
+ # "adapters": {}
21
+ # }
22
+ #
23
+ # Notes:
24
+ # - "version" is for forward-compatible schema evolution (start at "1")
25
+ # - "models" is keyed by IC name; use "default" for single-model projects
26
+ # - "adapters" is reserved for future LoRA adapter staging (BL-122)
27
+ # - This file is git-ignored (.mlcc/ contains account-specific URIs)
28
+ # - The file SHALL NOT be created unless a valid staging operation completes
29
+ # ──────────────────────────────────────────────────────────────────────────────
30
+
31
+ # Path to the staged-assets file (relative to project root)
32
+ STAGED_ASSETS_DIR=".mlcc"
33
+ STAGED_ASSETS_FILE="${STAGED_ASSETS_DIR}/staged-assets.json"
34
+
35
+ # _staged_assets_has_jq()
36
+ # Check if jq is available on the system.
37
+ # Returns 0 if available, 1 if not.
38
+ _staged_assets_has_jq() {
39
+ command -v jq &>/dev/null
40
+ }
41
+
42
+ # _staged_assets_warn_no_jq()
43
+ # Print a one-time warning when jq is not available.
44
+ _staged_assets_warn_no_jq() {
45
+ if [ -z "${_STAGED_ASSETS_JQ_WARNED:-}" ]; then
46
+ echo "⚠️ jq not found — using fallback parser (install jq for full functionality)" >&2
47
+ _STAGED_ASSETS_JQ_WARNED=1
48
+ fi
49
+ }
50
+
51
+ # staged_assets_read_model_uri()
52
+ # Read the staged S3 URI for the default model from the staged-assets file.
53
+ # Echoes the S3 URI if found, or an empty string if not available.
54
+ #
55
+ # Uses jq when available; falls back to grep/sed extraction.
56
+ #
57
+ # Arguments: none
58
+ # Output: S3 URI string (stdout) or empty string
59
+ staged_assets_read_model_uri() {
60
+ local uri=""
61
+
62
+ # No file → empty string
63
+ if [ ! -f "${STAGED_ASSETS_FILE}" ]; then
64
+ echo ""
65
+ return 0
66
+ fi
67
+
68
+ if _staged_assets_has_jq; then
69
+ uri=$(jq -r '.models.default.staged_uri // empty' "${STAGED_ASSETS_FILE}" 2>/dev/null) || uri=""
70
+ else
71
+ _staged_assets_warn_no_jq
72
+ # Fallback: grep/sed extraction for the staged_uri field within the default model block
73
+ # This handles the common single-model case reliably
74
+ uri=$(grep -A 5 '"default"' "${STAGED_ASSETS_FILE}" 2>/dev/null \
75
+ | grep '"staged_uri"' \
76
+ | sed 's/.*"staged_uri"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' \
77
+ | head -1) || uri=""
78
+ fi
79
+
80
+ echo "${uri}"
81
+ }
82
+
83
+ # staged_assets_write_model()
84
+ # Create or update the staged-assets file with model staging information.
85
+ # Creates the .mlcc directory if it does not exist.
86
+ #
87
+ # Arguments:
88
+ # $1 - source: HuggingFace model ID (e.g. "google/gemma-4-31B-it")
89
+ # $2 - uri: S3 URI where the model was staged (with trailing slash)
90
+ # $3 - region: AWS region where the model was staged
91
+ # $4 - size_gb: Total size of the staged model in GB (numeric)
92
+ staged_assets_write_model() {
93
+ local source="$1"
94
+ local uri="$2"
95
+ local region="$3"
96
+ local size_gb="$4"
97
+ local timestamp
98
+ timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)
99
+
100
+ # Ensure the .mlcc directory exists
101
+ mkdir -p "${STAGED_ASSETS_DIR}"
102
+
103
+ if _staged_assets_has_jq; then
104
+ if [ -f "${STAGED_ASSETS_FILE}" ]; then
105
+ # Update existing file — merge the new model entry
106
+ local tmp_file="${STAGED_ASSETS_FILE}.tmp"
107
+ jq --arg source "${source}" \
108
+ --arg uri "${uri}" \
109
+ --arg ts "${timestamp}" \
110
+ --arg region "${region}" \
111
+ --argjson size "${size_gb}" \
112
+ '.models.default = {
113
+ "source": $source,
114
+ "staged_uri": $uri,
115
+ "staged_at": $ts,
116
+ "region": $region,
117
+ "size_gb": $size
118
+ }' "${STAGED_ASSETS_FILE}" > "${tmp_file}" && mv "${tmp_file}" "${STAGED_ASSETS_FILE}"
119
+ else
120
+ # Create new file from scratch
121
+ jq -n --arg source "${source}" \
122
+ --arg uri "${uri}" \
123
+ --arg ts "${timestamp}" \
124
+ --arg region "${region}" \
125
+ --argjson size "${size_gb}" \
126
+ '{
127
+ "version": "1",
128
+ "models": {
129
+ "default": {
130
+ "source": $source,
131
+ "staged_uri": $uri,
132
+ "staged_at": $ts,
133
+ "region": $region,
134
+ "size_gb": $size
135
+ }
136
+ },
137
+ "adapters": {}
138
+ }' > "${STAGED_ASSETS_FILE}"
139
+ fi
140
+ else
141
+ _staged_assets_warn_no_jq
142
+ # Fallback: write the JSON directly (create-only, no merge support without jq)
143
+ cat > "${STAGED_ASSETS_FILE}" << EOF
144
+ {
145
+ "version": "1",
146
+ "models": {
147
+ "default": {
148
+ "source": "${source}",
149
+ "staged_uri": "${uri}",
150
+ "staged_at": "${timestamp}",
151
+ "region": "${region}",
152
+ "size_gb": ${size_gb}
153
+ }
154
+ },
155
+ "adapters": {}
156
+ }
157
+ EOF
158
+ fi
159
+ }
160
+
161
+ # staged_assets_status()
162
+ # Print a human-readable table of all staged assets.
163
+ # Shows models and adapters with their source, URI, region, size, and timestamp.
164
+ #
165
+ # Arguments: none
166
+ # Output: formatted table to stdout
167
+ staged_assets_status() {
168
+ if [ ! -f "${STAGED_ASSETS_FILE}" ]; then
169
+ echo "No staged assets found."
170
+ echo " Run do/stage to stage model weights to S3."
171
+ return 0
172
+ fi
173
+
174
+ echo "Staged Assets (.mlcc/staged-assets.json)"
175
+ echo "─────────────────────────────────────────────────────────────────"
176
+
177
+ if _staged_assets_has_jq; then
178
+ # Print models section
179
+ local model_count
180
+ model_count=$(jq -r '.models | length' "${STAGED_ASSETS_FILE}" 2>/dev/null) || model_count=0
181
+
182
+ if [ "${model_count}" -gt 0 ]; then
183
+ echo ""
184
+ echo " Models:"
185
+ echo " ┌──────────────┬─────────────────────────────────┬──────────────────────────────────────────────────────┬────────────┬─────────┐"
186
+ printf " │ %-12s │ %-31s │ %-52s │ %-10s │ %-7s │\n" "IC Name" "Source" "S3 URI" "Region" "Size"
187
+ echo " ├──────────────┼─────────────────────────────────┼──────────────────────────────────────────────────────┼────────────┼─────────┤"
188
+
189
+ jq -r '.models | to_entries[] | "\(.key)\t\(.value.source)\t\(.value.staged_uri)\t\(.value.region)\t\(.value.size_gb)"' "${STAGED_ASSETS_FILE}" 2>/dev/null | \
190
+ while IFS=$'\t' read -r ic_name source staged_uri region size_gb; do
191
+ printf " │ %-12s │ %-31s │ %-52s │ %-10s │ %5s GB│\n" \
192
+ "${ic_name}" "${source}" "${staged_uri}" "${region}" "${size_gb}"
193
+ done
194
+
195
+ echo " └──────────────┴─────────────────────────────────┴──────────────────────────────────────────────────────┴────────────┴─────────┘"
196
+ fi
197
+
198
+ # Print adapters section (future — show placeholder if empty)
199
+ local adapter_count
200
+ adapter_count=$(jq -r '.adapters | length' "${STAGED_ASSETS_FILE}" 2>/dev/null) || adapter_count=0
201
+
202
+ if [ "${adapter_count}" -gt 0 ]; then
203
+ echo ""
204
+ echo " Adapters:"
205
+ jq -r '.adapters | to_entries[] | " \(.key): \(.value.staged_uri // "not staged")"' "${STAGED_ASSETS_FILE}" 2>/dev/null
206
+ fi
207
+ else
208
+ _staged_assets_warn_no_jq
209
+ # Fallback: basic display without jq
210
+ echo ""
211
+ echo " Raw contents:"
212
+ echo ""
213
+ cat "${STAGED_ASSETS_FILE}"
214
+ fi
215
+
216
+ echo ""
217
+ }
package/templates/do/push CHANGED
@@ -12,8 +12,8 @@ source "${SCRIPT_DIR}/config"
12
12
  source "${SCRIPT_DIR}/lib/profile.sh"
13
13
 
14
14
  # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
- ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
16
- export AWS_REGION="${AWS_REGION:-${_PROFILE[awsRegion]:-us-east-1}}"
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
16
+ export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
17
17
 
18
18
  echo "🚀 Pushing Docker image to Amazon ECR"
19
19
  echo " Project: ${PROJECT_NAME}"
@@ -12,8 +12,8 @@ source "${SCRIPT_DIR}/config"
12
12
  source "${SCRIPT_DIR}/lib/profile.sh"
13
13
 
14
14
  # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
- ROLE_ARN="${ROLE_ARN:-${_PROFILE[roleArn]:-}}"
16
- ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
15
+ ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
16
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
17
17
 
18
18
  # ============================================================
19
19
  # Register deployment to the deployment registry
@@ -23,40 +23,42 @@ set -o pipefail
23
23
  # ── Source project configuration ──────────────────────────────────────────────
24
24
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
25
25
  source "${SCRIPT_DIR}/config"
26
+ source "${SCRIPT_DIR}/lib/profile.sh"
27
+ source "${SCRIPT_DIR}/lib/staged-assets.sh"
26
28
 
27
29
  # ── Parse flags ───────────────────────────────────────────────────────────────
28
30
  FORCE=false
29
- UPDATE_CONFIG=false
31
+ UPDATE_CONFIG=true
30
32
  SUBMIT_MODE=false
31
33
  NO_WAIT=false
32
34
  while [ $# -gt 0 ]; do
33
35
  case "$1" in
34
36
  --force) FORCE=true; shift ;;
35
- --update-config) UPDATE_CONFIG=true; shift ;;
37
+ --update-config) UPDATE_CONFIG=true; shift ;; # default, kept for backward compat
38
+ --no-update-config) UPDATE_CONFIG=false; shift ;;
36
39
  --submit) SUBMIT_MODE=true; shift ;;
37
40
  --no-wait) NO_WAIT=true; shift ;;
38
41
  --help|-h)
39
- echo "Usage: ./do/stage [--force] [--update-config] [--submit] [--no-wait]"
42
+ echo "Usage: ./do/stage [--force] [--no-update-config] [--submit] [--no-wait]"
40
43
  echo ""
41
44
  echo "Pre-stage model weights from HuggingFace to S3."
45
+ echo "On success, updates MODEL_NAME in do/config so subsequent tasks"
46
+ echo "(submit, deploy) pull from S3 with HuggingFace as fallback."
42
47
  echo ""
43
48
  echo "Modes:"
44
49
  echo " (default) Download locally then sync to S3"
45
50
  echo " --submit Submit as SageMaker Processing Job (for models >500GB)"
46
51
  echo ""
47
52
  echo "Options:"
48
- echo " --force Re-stage even if model already exists in S3"
49
- echo " --update-config Update MODEL_NAME in do/config to the staged S3 URI"
50
- echo " --no-wait (with --submit) Exit without polling for completion"
53
+ echo " --force Re-stage even if model already exists in S3"
54
+ echo " --no-update-config Do NOT update MODEL_NAME in do/config after staging"
55
+ echo " --no-wait (with --submit) Exit without polling for completion"
51
56
  echo ""
52
57
  echo "Environment:"
53
58
  echo " HF_TOKEN HuggingFace token (for gated models)"
54
59
  echo ""
55
60
  echo "The staged S3 URI will be printed on completion."
56
- echo "Pass --update-config to automatically update do/config for S3-backed deploys."
57
- echo ""
58
- echo "The --submit mode uses a SageMaker Processing Job with 2TB attached"
59
- echo "storage, suitable for very large models that exceed local disk capacity."
61
+ echo "MODEL_NAME in do/config is updated automatically unless --no-update-config is passed."
60
62
  exit 0
61
63
  ;;
62
64
  *) shift ;;
@@ -126,11 +128,16 @@ echo "Target: ${S3_OUTPUT_URI}"
126
128
  echo ""
127
129
 
128
130
  # Install dependencies
129
- echo "📦 Installing huggingface-cli and hf_transfer..."
130
- pip install -q huggingface_hub[cli] hf_transfer
131
+ echo "📦 Checking huggingface-cli and hf_transfer..."
132
+ pip install -q huggingface_hub[cli] hf_transfer 2>/dev/null || true
131
133
 
132
- # Enable fast parallel downloads
133
- export HF_HUB_ENABLE_HF_TRANSFER=1
134
+ # Enable fast parallel downloads only if hf_transfer is available
135
+ if python3 -c "import hf_transfer" 2>/dev/null; then
136
+ export HF_HUB_ENABLE_HF_TRANSFER=1
137
+ else
138
+ echo " ℹ️ hf_transfer not available — using standard download (install with: pip install hf_transfer)"
139
+ unset HF_HUB_ENABLE_HF_TRANSFER 2>/dev/null || true
140
+ fi
134
141
 
135
142
  # Set HF token if provided
136
143
  if [ -n "${HF_TOKEN:-}" ]; then
@@ -324,15 +331,11 @@ print(d.get('FailureReason', ''))
324
331
  CONFIG_FILE="${SCRIPT_DIR}/config"
325
332
  sed -i.bak "s|^export MODEL_NAME=.*|export MODEL_NAME=\"${MODEL_S3_URI}\"|" "${CONFIG_FILE}"
326
333
  rm -f "${CONFIG_FILE}.bak"
327
- echo " ✅ Updated MODEL_NAME in do/config → ${MODEL_S3_URI}"
328
- echo ""
329
- echo " Re-deploy with S3-backed model: ./do/deploy"
334
+ echo " ✅ Updated MODEL_NAME in do/config → S3-backed"
335
+ echo " Subsequent tasks (submit, deploy) will pull from S3."
330
336
  else
331
337
  echo " To use this staged model, update do/config:"
332
338
  echo " export MODEL_NAME=\"${MODEL_S3_URI}\""
333
- echo ""
334
- echo " Or re-run with --update-config:"
335
- echo " ./do/stage --submit --update-config"
336
339
  fi
337
340
  return 0
338
341
  ;;
@@ -431,7 +434,7 @@ if [ "${FORCE}" = false ]; then
431
434
  CONFIG_FILE="${SCRIPT_DIR}/config"
432
435
  sed -i.bak "s|^export MODEL_NAME=.*|export MODEL_NAME=\"${MODEL_S3_URI}\"|" "${CONFIG_FILE}"
433
436
  rm -f "${CONFIG_FILE}.bak"
434
- echo " ✅ Updated MODEL_NAME in do/config → ${MODEL_S3_URI}"
437
+ echo " ✅ Updated MODEL_NAME in do/config → S3-backed"
435
438
  else
436
439
  echo " To use this staged model, set in do/config:"
437
440
  echo " export MODEL_NAME=\"${MODEL_S3_URI}\""
@@ -472,13 +475,21 @@ fi
472
475
 
473
476
  # ── Download model from HuggingFace ──────────────────────────────────────────
474
477
  echo "⬇️ Downloading model from HuggingFace: ${MODEL_NAME}"
475
- echo " Using hf_transfer for fast parallel downloads..."
478
+ if python3 -c "import hf_transfer" 2>/dev/null; then
479
+ echo " Using hf_transfer for fast parallel downloads..."
480
+ else
481
+ echo " Using standard downloads (install hf_transfer for faster staging)..."
482
+ fi
476
483
  echo ""
477
484
 
478
- # Enable fast parallel downloads via hf_transfer
479
- export HF_HUB_ENABLE_HF_TRANSFER=1
485
+ # Enable fast parallel downloads via hf_transfer (if available)
486
+ if python3 -c "import hf_transfer" 2>/dev/null; then
487
+ export HF_HUB_ENABLE_HF_TRANSFER=1
488
+ else
489
+ unset HF_HUB_ENABLE_HF_TRANSFER 2>/dev/null || true
490
+ fi
480
491
 
481
- # Download to HF cache (huggingface-cli manages cache location)
492
+ # Download to HF cache
482
493
  DOWNLOAD_ARGS=("${MODEL_NAME}")
483
494
  if [ -n "${HF_TOKEN:-}" ]; then
484
495
  DOWNLOAD_ARGS+=("--token" "${HF_TOKEN}")
@@ -553,15 +564,9 @@ if [ "${UPDATE_CONFIG}" = true ]; then
553
564
  CONFIG_FILE="${SCRIPT_DIR}/config"
554
565
  sed -i.bak "s|^export MODEL_NAME=.*|export MODEL_NAME=\"${MODEL_S3_URI}\"|" "${CONFIG_FILE}"
555
566
  rm -f "${CONFIG_FILE}.bak"
556
- echo " ✅ Updated MODEL_NAME in do/config → ${MODEL_S3_URI}"
557
- echo ""
558
- echo " Re-deploy with S3-backed model: ./do/deploy"
567
+ echo " ✅ Updated MODEL_NAME in do/config → S3-backed"
568
+ echo " Subsequent tasks (submit, deploy) will pull from S3."
559
569
  else
560
570
  echo " To use this staged model, update do/config:"
561
571
  echo " export MODEL_NAME=\"${MODEL_S3_URI}\""
562
- echo ""
563
- echo " Or re-run with --update-config to do it automatically:"
564
- echo " ./do/stage --update-config"
565
- echo ""
566
- echo " Then re-deploy: ./do/deploy"
567
572
  fi
@@ -12,7 +12,7 @@ source "${SCRIPT_DIR}/config"
12
12
  source "${SCRIPT_DIR}/lib/profile.sh"
13
13
 
14
14
  # ── Profile-resolved variables (env var > profile > default) ──────────────────
15
- ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE[ecrRepositoryName]:-ml-container-creator}}"
15
+ ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
16
16
 
17
17
  # ── Derived variables (env var > computed default) ────────────────────────────
18
18
  CODEBUILD_PROJECT_NAME="${CODEBUILD_PROJECT_NAME:-${PROJECT_NAME}-build-$(date +%Y%m%d)}"
package/templates/do/tune CHANGED
@@ -16,7 +16,7 @@ source "${SCRIPT_DIR}/config"
16
16
  source "${SCRIPT_DIR}/lib/profile.sh"
17
17
 
18
18
  # ── Profile-resolved variables (env var > profile > default) ──────────────────
19
- TUNE_S3_BUCKET="${TUNE_S3_BUCKET:-mlcc-tune-${_PROFILE[accountId]:-unknown}-${_PROFILE[awsRegion]:-us-east-1}}"
19
+ TUNE_S3_BUCKET="${TUNE_S3_BUCKET:-mlcc-tune-${_PROFILE_accountId:-unknown}-${_PROFILE_awsRegion:-us-east-1}}"
20
20
 
21
21
  # ── Constants ─────────────────────────────────────────────────────────────────
22
22
  CATALOG_FILE="${SCRIPT_DIR}/.tune_catalog.json"