@aws/ml-container-creator 0.12.1 → 0.13.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +11 -3
- package/servers/instance-sizer/lib/model-resolver.js +127 -185
- package/servers/instance-sizer/lib/vram-estimator.js +86 -0
- package/servers/lib/catalogs/instances.json +0 -27
- package/src/app.js +14 -0
- package/src/lib/bootstrap-command-handler.js +2 -2
- package/src/lib/generated/cli-options.js +1 -1
- package/src/lib/generated/parameter-matrix.js +1 -1
- package/src/lib/generated/validation-rules.js +1 -1
- package/src/lib/prompt-runner.js +14 -31
- package/src/lib/prove-pipeline-executor.js +294 -0
- package/templates/IAM_PERMISSIONS.md +64 -13
- package/templates/do/.tune_helper.py +5 -2
- package/templates/do/README.md +50 -567
- package/templates/do/adapter +1 -1
- package/templates/do/build +2 -2
- package/templates/do/clean.d/async-inference.ejs +2 -2
- package/templates/do/clean.d/batch-transform.ejs +2 -2
- package/templates/do/clean.d/hyperpod-eks.ejs +2 -2
- package/templates/do/clean.d/managed-inference.ejs +2 -2
- package/templates/do/deploy.d/async-inference.ejs +6 -6
- package/templates/do/deploy.d/batch-transform.ejs +4 -4
- package/templates/do/deploy.d/hyperpod-eks.ejs +1 -1
- package/templates/do/deploy.d/managed-inference.ejs +15 -3
- package/templates/do/lib/profile.sh +19 -15
- package/templates/do/lib/staged-assets.sh +217 -0
- package/templates/do/push +2 -2
- package/templates/do/register +2 -2
- package/templates/do/stage +38 -33
- package/templates/do/submit +1 -1
- package/templates/do/tune +1 -1
- package/templates/MIGRATION.md +0 -488
- package/templates/TEMPLATE_SYSTEM.md +0 -243
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
|
@@ -41,15 +41,15 @@ source "${SCRIPT_DIR}/config"
|
|
|
41
41
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
42
|
|
|
43
43
|
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
44
|
-
ROLE_ARN="${ROLE_ARN:-${
|
|
45
|
-
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${
|
|
46
|
-
export AWS_REGION="${AWS_REGION:-${
|
|
44
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
|
|
45
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
|
|
46
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
|
|
47
47
|
|
|
48
48
|
# Async-specific derived variables
|
|
49
|
-
_ASYNC_BUCKET="${
|
|
49
|
+
_ASYNC_BUCKET="${_PROFILE_asyncS3Bucket:-mlcc-async-${_PROFILE_accountId:-unknown}-${_PROFILE_awsRegion:-us-east-1}}"
|
|
50
50
|
ASYNC_S3_OUTPUT_PATH="${ASYNC_S3_OUTPUT_PATH:-s3://${_ASYNC_BUCKET}/${PROJECT_NAME}/output/}"
|
|
51
|
-
ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${
|
|
52
|
-
ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${
|
|
51
|
+
ASYNC_SNS_SUCCESS_TOPIC="${ASYNC_SNS_SUCCESS_TOPIC:-arn:aws:sns:${_PROFILE_awsRegion:-us-east-1}:${_PROFILE_accountId:-unknown}:ml-container-creator-${PROJECT_NAME}-async-success}"
|
|
52
|
+
ASYNC_SNS_ERROR_TOPIC="${ASYNC_SNS_ERROR_TOPIC:-arn:aws:sns:${_PROFILE_awsRegion:-us-east-1}:${_PROFILE_accountId:-unknown}:ml-container-creator-${PROJECT_NAME}-async-error}"
|
|
53
53
|
|
|
54
54
|
echo "🚀 Deploying to AWS"
|
|
55
55
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -41,12 +41,12 @@ source "${SCRIPT_DIR}/config"
|
|
|
41
41
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
42
|
|
|
43
43
|
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
44
|
-
ROLE_ARN="${ROLE_ARN:-${
|
|
45
|
-
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${
|
|
46
|
-
export AWS_REGION="${AWS_REGION:-${
|
|
44
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
|
|
45
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
|
|
46
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
|
|
47
47
|
|
|
48
48
|
# Batch-specific derived variables
|
|
49
|
-
_BATCH_BUCKET="${
|
|
49
|
+
_BATCH_BUCKET="${_PROFILE_batchS3Bucket:-mlcc-batch-${_PROFILE_accountId:-unknown}-${_PROFILE_awsRegion:-us-east-1}}"
|
|
50
50
|
BATCH_INPUT_PATH="${BATCH_INPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/input/}"
|
|
51
51
|
BATCH_OUTPUT_PATH="${BATCH_OUTPUT_PATH:-s3://${_BATCH_BUCKET}/${PROJECT_NAME}/output/}"
|
|
52
52
|
|
|
@@ -41,7 +41,7 @@ source "${SCRIPT_DIR}/config"
|
|
|
41
41
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
42
42
|
|
|
43
43
|
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
44
|
-
export AWS_REGION="${AWS_REGION:-${
|
|
44
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
|
|
45
45
|
|
|
46
46
|
echo "🚀 Deploying to AWS"
|
|
47
47
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -214,9 +214,9 @@ source "${SCRIPT_DIR}/config"
|
|
|
214
214
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
215
215
|
|
|
216
216
|
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
217
|
-
ROLE_ARN="${ROLE_ARN:-${
|
|
218
|
-
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${
|
|
219
|
-
export AWS_REGION="${AWS_REGION:-${
|
|
217
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
|
|
218
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
|
|
219
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
|
|
220
220
|
|
|
221
221
|
echo "🚀 Deploying to AWS"
|
|
222
222
|
echo " Project: ${PROJECT_NAME}"
|
|
@@ -329,6 +329,18 @@ if [ -z "${ROLE_ARN:-}" ]; then
|
|
|
329
329
|
exit 3
|
|
330
330
|
fi
|
|
331
331
|
|
|
332
|
+
# Validate ROLE_ARN looks like an IAM role ARN
|
|
333
|
+
if ! echo "${ROLE_ARN}" | grep -qE '^arn:aws[a-z-]*:iam::[0-9]{12}:role/.+'; then
|
|
334
|
+
echo "❌ ROLE_ARN is not a valid IAM role ARN:"
|
|
335
|
+
echo " Got: ${ROLE_ARN}"
|
|
336
|
+
echo " Expected format: arn:aws:iam::123456789012:role/RoleName"
|
|
337
|
+
echo ""
|
|
338
|
+
echo " This may indicate a misconfigured bootstrap profile."
|
|
339
|
+
echo " Check ~/.ml-container-creator/config.json 'roleArn' field,"
|
|
340
|
+
echo " or set the correct value: export ROLE_ARN=arn:aws:iam::ACCOUNT:role/YOUR_ROLE"
|
|
341
|
+
exit 3
|
|
342
|
+
fi
|
|
343
|
+
|
|
332
344
|
echo " Using execution role: ${ROLE_ARN}"
|
|
333
345
|
|
|
334
346
|
# Validate --ic argument if specified (set by --ic <name> or --force-ic <name>)
|
|
@@ -1,21 +1,27 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
|
-
# Profile loader — reads active bootstrap profile into
|
|
2
|
+
# Profile loader — reads active bootstrap profile into _PROFILE_<key> variables.
|
|
3
3
|
# Source this file after do/config. Values provide defaults; explicit env vars take precedence.
|
|
4
4
|
#
|
|
5
|
-
#
|
|
6
|
-
#
|
|
5
|
+
# POSIX-compatible: works on bash 3.2+ (macOS default) and bash 4+/5+.
|
|
6
|
+
# No associative arrays required.
|
|
7
7
|
#
|
|
8
|
-
#
|
|
8
|
+
# After sourcing, access values via:
|
|
9
|
+
# ${_PROFILE_roleArn:-}
|
|
10
|
+
# ${_PROFILE_ecrRepositoryName:-ml-container-creator}
|
|
11
|
+
# ${_PROFILE_awsRegion:-us-east-1}
|
|
12
|
+
# ${_PROFILE_accountId:-}
|
|
13
|
+
# ${_PROFILE_benchmarkS3Bucket:-}
|
|
14
|
+
# ${_PROFILE_asyncS3Bucket:-}
|
|
15
|
+
# ${_PROFILE_batchS3Bucket:-}
|
|
16
|
+
#
|
|
17
|
+
# Expected keys (set as _PROFILE_<key>):
|
|
9
18
|
# awsRegion, accountId, awsProfile, roleArn, ecrRepositoryName,
|
|
10
19
|
# benchmarkS3Bucket, ciBenchmarkResultsBucket, asyncS3Bucket, batchS3Bucket,
|
|
11
20
|
# ciTableName, ciInfraProvisioned
|
|
12
21
|
|
|
13
22
|
# Temporarily disable unbound variable checking for profile loading
|
|
14
|
-
# (keys may not exist in the profile config, and declare -A behavior
|
|
15
|
-
# varies across bash versions with set -u)
|
|
16
23
|
set +u 2>/dev/null || true
|
|
17
24
|
|
|
18
|
-
declare -A _PROFILE 2>/dev/null || true
|
|
19
25
|
if command -v python3 &>/dev/null; then
|
|
20
26
|
_PROFILE_RAW=$(python3 -c "
|
|
21
27
|
import json, os
|
|
@@ -23,22 +29,20 @@ try:
|
|
|
23
29
|
with open(os.path.expanduser('~/.ml-container-creator/config.json')) as f:
|
|
24
30
|
c = json.load(f)
|
|
25
31
|
p = c['profiles'][c['activeProfile']]
|
|
26
|
-
# Output as
|
|
32
|
+
# Output as _PROFILE_KEY=VALUE lines — safe for eval with known prefix
|
|
27
33
|
for k, v in p.items():
|
|
28
34
|
if isinstance(v, (str, int, float, bool)):
|
|
29
|
-
|
|
35
|
+
# Sanitize: only allow alphanumeric key names
|
|
36
|
+
if k.isalnum() or all(c.isalnum() or c == '_' for c in k):
|
|
37
|
+
print(f'_PROFILE_{k}=\"{v}\"')
|
|
30
38
|
except:
|
|
31
39
|
pass
|
|
32
40
|
" 2>/dev/null) || _PROFILE_RAW=""
|
|
33
41
|
|
|
34
42
|
if [ -n "${_PROFILE_RAW}" ]; then
|
|
35
|
-
|
|
36
|
-
[ -n "${key}" ] && _PROFILE["${key}"]="${value}"
|
|
37
|
-
done <<< "${_PROFILE_RAW}"
|
|
43
|
+
eval "${_PROFILE_RAW}"
|
|
38
44
|
fi
|
|
39
45
|
fi
|
|
40
46
|
|
|
41
47
|
# NOTE: set -u is NOT re-enabled here. The caller is responsible for managing
|
|
42
|
-
# their own shell options.
|
|
43
|
-
# errors when accessing _PROFILE keys on bash versions where empty associative
|
|
44
|
-
# arrays are treated as unset (bash 5.x on some platforms).
|
|
48
|
+
# their own shell options.
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Shared helper: read/write the .mlcc/staged-assets.json tracking file.
|
|
5
|
+
# Sourced by do/stage, do/submit, and other lifecycle scripts.
|
|
6
|
+
#
|
|
7
|
+
# ─── Schema (.mlcc/staged-assets.json) ───────────────────────────────────────
|
|
8
|
+
#
|
|
9
|
+
# {
|
|
10
|
+
# "version": "1",
|
|
11
|
+
# "models": {
|
|
12
|
+
# "<ic-name>": {
|
|
13
|
+
# "source": "<HuggingFace model ID, e.g. google/gemma-4-31B-it>",
|
|
14
|
+
# "staged_uri": "<S3 URI with trailing slash>",
|
|
15
|
+
# "staged_at": "<ISO 8601 timestamp>",
|
|
16
|
+
# "region": "<AWS region where the model was staged>",
|
|
17
|
+
# "size_gb": <numeric size in GB>
|
|
18
|
+
# }
|
|
19
|
+
# },
|
|
20
|
+
# "adapters": {}
|
|
21
|
+
# }
|
|
22
|
+
#
|
|
23
|
+
# Notes:
|
|
24
|
+
# - "version" is for forward-compatible schema evolution (start at "1")
|
|
25
|
+
# - "models" is keyed by IC name; use "default" for single-model projects
|
|
26
|
+
# - "adapters" is reserved for future LoRA adapter staging (BL-122)
|
|
27
|
+
# - This file is git-ignored (.mlcc/ contains account-specific URIs)
|
|
28
|
+
# - The file SHALL NOT be created unless a valid staging operation completes
|
|
29
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
30
|
+
|
|
31
|
+
# Path to the staged-assets file (relative to project root)
|
|
32
|
+
STAGED_ASSETS_DIR=".mlcc"
|
|
33
|
+
STAGED_ASSETS_FILE="${STAGED_ASSETS_DIR}/staged-assets.json"
|
|
34
|
+
|
|
35
|
+
# _staged_assets_has_jq()
|
|
36
|
+
# Check if jq is available on the system.
|
|
37
|
+
# Returns 0 if available, 1 if not.
|
|
38
|
+
_staged_assets_has_jq() {
|
|
39
|
+
command -v jq &>/dev/null
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# _staged_assets_warn_no_jq()
|
|
43
|
+
# Print a one-time warning when jq is not available.
|
|
44
|
+
_staged_assets_warn_no_jq() {
|
|
45
|
+
if [ -z "${_STAGED_ASSETS_JQ_WARNED:-}" ]; then
|
|
46
|
+
echo "⚠️ jq not found — using fallback parser (install jq for full functionality)" >&2
|
|
47
|
+
_STAGED_ASSETS_JQ_WARNED=1
|
|
48
|
+
fi
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# staged_assets_read_model_uri()
|
|
52
|
+
# Read the staged S3 URI for the default model from the staged-assets file.
|
|
53
|
+
# Echoes the S3 URI if found, or an empty string if not available.
|
|
54
|
+
#
|
|
55
|
+
# Uses jq when available; falls back to grep/sed extraction.
|
|
56
|
+
#
|
|
57
|
+
# Arguments: none
|
|
58
|
+
# Output: S3 URI string (stdout) or empty string
|
|
59
|
+
staged_assets_read_model_uri() {
|
|
60
|
+
local uri=""
|
|
61
|
+
|
|
62
|
+
# No file → empty string
|
|
63
|
+
if [ ! -f "${STAGED_ASSETS_FILE}" ]; then
|
|
64
|
+
echo ""
|
|
65
|
+
return 0
|
|
66
|
+
fi
|
|
67
|
+
|
|
68
|
+
if _staged_assets_has_jq; then
|
|
69
|
+
uri=$(jq -r '.models.default.staged_uri // empty' "${STAGED_ASSETS_FILE}" 2>/dev/null) || uri=""
|
|
70
|
+
else
|
|
71
|
+
_staged_assets_warn_no_jq
|
|
72
|
+
# Fallback: grep/sed extraction for the staged_uri field within the default model block
|
|
73
|
+
# This handles the common single-model case reliably
|
|
74
|
+
uri=$(grep -A 5 '"default"' "${STAGED_ASSETS_FILE}" 2>/dev/null \
|
|
75
|
+
| grep '"staged_uri"' \
|
|
76
|
+
| sed 's/.*"staged_uri"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' \
|
|
77
|
+
| head -1) || uri=""
|
|
78
|
+
fi
|
|
79
|
+
|
|
80
|
+
echo "${uri}"
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
# staged_assets_write_model()
|
|
84
|
+
# Create or update the staged-assets file with model staging information.
|
|
85
|
+
# Creates the .mlcc directory if it does not exist.
|
|
86
|
+
#
|
|
87
|
+
# Arguments:
|
|
88
|
+
# $1 - source: HuggingFace model ID (e.g. "google/gemma-4-31B-it")
|
|
89
|
+
# $2 - uri: S3 URI where the model was staged (with trailing slash)
|
|
90
|
+
# $3 - region: AWS region where the model was staged
|
|
91
|
+
# $4 - size_gb: Total size of the staged model in GB (numeric)
|
|
92
|
+
staged_assets_write_model() {
|
|
93
|
+
local source="$1"
|
|
94
|
+
local uri="$2"
|
|
95
|
+
local region="$3"
|
|
96
|
+
local size_gb="$4"
|
|
97
|
+
local timestamp
|
|
98
|
+
timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
99
|
+
|
|
100
|
+
# Ensure the .mlcc directory exists
|
|
101
|
+
mkdir -p "${STAGED_ASSETS_DIR}"
|
|
102
|
+
|
|
103
|
+
if _staged_assets_has_jq; then
|
|
104
|
+
if [ -f "${STAGED_ASSETS_FILE}" ]; then
|
|
105
|
+
# Update existing file — merge the new model entry
|
|
106
|
+
local tmp_file="${STAGED_ASSETS_FILE}.tmp"
|
|
107
|
+
jq --arg source "${source}" \
|
|
108
|
+
--arg uri "${uri}" \
|
|
109
|
+
--arg ts "${timestamp}" \
|
|
110
|
+
--arg region "${region}" \
|
|
111
|
+
--argjson size "${size_gb}" \
|
|
112
|
+
'.models.default = {
|
|
113
|
+
"source": $source,
|
|
114
|
+
"staged_uri": $uri,
|
|
115
|
+
"staged_at": $ts,
|
|
116
|
+
"region": $region,
|
|
117
|
+
"size_gb": $size
|
|
118
|
+
}' "${STAGED_ASSETS_FILE}" > "${tmp_file}" && mv "${tmp_file}" "${STAGED_ASSETS_FILE}"
|
|
119
|
+
else
|
|
120
|
+
# Create new file from scratch
|
|
121
|
+
jq -n --arg source "${source}" \
|
|
122
|
+
--arg uri "${uri}" \
|
|
123
|
+
--arg ts "${timestamp}" \
|
|
124
|
+
--arg region "${region}" \
|
|
125
|
+
--argjson size "${size_gb}" \
|
|
126
|
+
'{
|
|
127
|
+
"version": "1",
|
|
128
|
+
"models": {
|
|
129
|
+
"default": {
|
|
130
|
+
"source": $source,
|
|
131
|
+
"staged_uri": $uri,
|
|
132
|
+
"staged_at": $ts,
|
|
133
|
+
"region": $region,
|
|
134
|
+
"size_gb": $size
|
|
135
|
+
}
|
|
136
|
+
},
|
|
137
|
+
"adapters": {}
|
|
138
|
+
}' > "${STAGED_ASSETS_FILE}"
|
|
139
|
+
fi
|
|
140
|
+
else
|
|
141
|
+
_staged_assets_warn_no_jq
|
|
142
|
+
# Fallback: write the JSON directly (create-only, no merge support without jq)
|
|
143
|
+
cat > "${STAGED_ASSETS_FILE}" << EOF
|
|
144
|
+
{
|
|
145
|
+
"version": "1",
|
|
146
|
+
"models": {
|
|
147
|
+
"default": {
|
|
148
|
+
"source": "${source}",
|
|
149
|
+
"staged_uri": "${uri}",
|
|
150
|
+
"staged_at": "${timestamp}",
|
|
151
|
+
"region": "${region}",
|
|
152
|
+
"size_gb": ${size_gb}
|
|
153
|
+
}
|
|
154
|
+
},
|
|
155
|
+
"adapters": {}
|
|
156
|
+
}
|
|
157
|
+
EOF
|
|
158
|
+
fi
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
# staged_assets_status()
|
|
162
|
+
# Print a human-readable table of all staged assets.
|
|
163
|
+
# Shows models and adapters with their source, URI, region, size, and timestamp.
|
|
164
|
+
#
|
|
165
|
+
# Arguments: none
|
|
166
|
+
# Output: formatted table to stdout
|
|
167
|
+
staged_assets_status() {
|
|
168
|
+
if [ ! -f "${STAGED_ASSETS_FILE}" ]; then
|
|
169
|
+
echo "No staged assets found."
|
|
170
|
+
echo " Run do/stage to stage model weights to S3."
|
|
171
|
+
return 0
|
|
172
|
+
fi
|
|
173
|
+
|
|
174
|
+
echo "Staged Assets (.mlcc/staged-assets.json)"
|
|
175
|
+
echo "─────────────────────────────────────────────────────────────────"
|
|
176
|
+
|
|
177
|
+
if _staged_assets_has_jq; then
|
|
178
|
+
# Print models section
|
|
179
|
+
local model_count
|
|
180
|
+
model_count=$(jq -r '.models | length' "${STAGED_ASSETS_FILE}" 2>/dev/null) || model_count=0
|
|
181
|
+
|
|
182
|
+
if [ "${model_count}" -gt 0 ]; then
|
|
183
|
+
echo ""
|
|
184
|
+
echo " Models:"
|
|
185
|
+
echo " ┌──────────────┬─────────────────────────────────┬──────────────────────────────────────────────────────┬────────────┬─────────┐"
|
|
186
|
+
printf " │ %-12s │ %-31s │ %-52s │ %-10s │ %-7s │\n" "IC Name" "Source" "S3 URI" "Region" "Size"
|
|
187
|
+
echo " ├──────────────┼─────────────────────────────────┼──────────────────────────────────────────────────────┼────────────┼─────────┤"
|
|
188
|
+
|
|
189
|
+
jq -r '.models | to_entries[] | "\(.key)\t\(.value.source)\t\(.value.staged_uri)\t\(.value.region)\t\(.value.size_gb)"' "${STAGED_ASSETS_FILE}" 2>/dev/null | \
|
|
190
|
+
while IFS=$'\t' read -r ic_name source staged_uri region size_gb; do
|
|
191
|
+
printf " │ %-12s │ %-31s │ %-52s │ %-10s │ %5s GB│\n" \
|
|
192
|
+
"${ic_name}" "${source}" "${staged_uri}" "${region}" "${size_gb}"
|
|
193
|
+
done
|
|
194
|
+
|
|
195
|
+
echo " └──────────────┴─────────────────────────────────┴──────────────────────────────────────────────────────┴────────────┴─────────┘"
|
|
196
|
+
fi
|
|
197
|
+
|
|
198
|
+
# Print adapters section (future — show placeholder if empty)
|
|
199
|
+
local adapter_count
|
|
200
|
+
adapter_count=$(jq -r '.adapters | length' "${STAGED_ASSETS_FILE}" 2>/dev/null) || adapter_count=0
|
|
201
|
+
|
|
202
|
+
if [ "${adapter_count}" -gt 0 ]; then
|
|
203
|
+
echo ""
|
|
204
|
+
echo " Adapters:"
|
|
205
|
+
jq -r '.adapters | to_entries[] | " \(.key): \(.value.staged_uri // "not staged")"' "${STAGED_ASSETS_FILE}" 2>/dev/null
|
|
206
|
+
fi
|
|
207
|
+
else
|
|
208
|
+
_staged_assets_warn_no_jq
|
|
209
|
+
# Fallback: basic display without jq
|
|
210
|
+
echo ""
|
|
211
|
+
echo " Raw contents:"
|
|
212
|
+
echo ""
|
|
213
|
+
cat "${STAGED_ASSETS_FILE}"
|
|
214
|
+
fi
|
|
215
|
+
|
|
216
|
+
echo ""
|
|
217
|
+
}
|
package/templates/do/push
CHANGED
|
@@ -12,8 +12,8 @@ source "${SCRIPT_DIR}/config"
|
|
|
12
12
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
13
|
|
|
14
14
|
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
-
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${
|
|
16
|
-
export AWS_REGION="${AWS_REGION:-${
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
|
|
16
|
+
export AWS_REGION="${AWS_REGION:-${_PROFILE_awsRegion:-us-east-1}}"
|
|
17
17
|
|
|
18
18
|
echo "🚀 Pushing Docker image to Amazon ECR"
|
|
19
19
|
echo " Project: ${PROJECT_NAME}"
|
package/templates/do/register
CHANGED
|
@@ -12,8 +12,8 @@ source "${SCRIPT_DIR}/config"
|
|
|
12
12
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
13
|
|
|
14
14
|
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
-
ROLE_ARN="${ROLE_ARN:-${
|
|
16
|
-
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${
|
|
15
|
+
ROLE_ARN="${ROLE_ARN:-${_PROFILE_roleArn:-}}"
|
|
16
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
|
|
17
17
|
|
|
18
18
|
# ============================================================
|
|
19
19
|
# Register deployment to the deployment registry
|
package/templates/do/stage
CHANGED
|
@@ -23,40 +23,42 @@ set -o pipefail
|
|
|
23
23
|
# ── Source project configuration ──────────────────────────────────────────────
|
|
24
24
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
25
25
|
source "${SCRIPT_DIR}/config"
|
|
26
|
+
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
27
|
+
source "${SCRIPT_DIR}/lib/staged-assets.sh"
|
|
26
28
|
|
|
27
29
|
# ── Parse flags ───────────────────────────────────────────────────────────────
|
|
28
30
|
FORCE=false
|
|
29
|
-
UPDATE_CONFIG=
|
|
31
|
+
UPDATE_CONFIG=true
|
|
30
32
|
SUBMIT_MODE=false
|
|
31
33
|
NO_WAIT=false
|
|
32
34
|
while [ $# -gt 0 ]; do
|
|
33
35
|
case "$1" in
|
|
34
36
|
--force) FORCE=true; shift ;;
|
|
35
|
-
--update-config) UPDATE_CONFIG=true; shift ;;
|
|
37
|
+
--update-config) UPDATE_CONFIG=true; shift ;; # default, kept for backward compat
|
|
38
|
+
--no-update-config) UPDATE_CONFIG=false; shift ;;
|
|
36
39
|
--submit) SUBMIT_MODE=true; shift ;;
|
|
37
40
|
--no-wait) NO_WAIT=true; shift ;;
|
|
38
41
|
--help|-h)
|
|
39
|
-
echo "Usage: ./do/stage [--force] [--update-config] [--submit] [--no-wait]"
|
|
42
|
+
echo "Usage: ./do/stage [--force] [--no-update-config] [--submit] [--no-wait]"
|
|
40
43
|
echo ""
|
|
41
44
|
echo "Pre-stage model weights from HuggingFace to S3."
|
|
45
|
+
echo "On success, updates MODEL_NAME in do/config so subsequent tasks"
|
|
46
|
+
echo "(submit, deploy) pull from S3 with HuggingFace as fallback."
|
|
42
47
|
echo ""
|
|
43
48
|
echo "Modes:"
|
|
44
49
|
echo " (default) Download locally then sync to S3"
|
|
45
50
|
echo " --submit Submit as SageMaker Processing Job (for models >500GB)"
|
|
46
51
|
echo ""
|
|
47
52
|
echo "Options:"
|
|
48
|
-
echo " --force
|
|
49
|
-
echo " --update-config
|
|
50
|
-
echo " --no-wait
|
|
53
|
+
echo " --force Re-stage even if model already exists in S3"
|
|
54
|
+
echo " --no-update-config Do NOT update MODEL_NAME in do/config after staging"
|
|
55
|
+
echo " --no-wait (with --submit) Exit without polling for completion"
|
|
51
56
|
echo ""
|
|
52
57
|
echo "Environment:"
|
|
53
58
|
echo " HF_TOKEN HuggingFace token (for gated models)"
|
|
54
59
|
echo ""
|
|
55
60
|
echo "The staged S3 URI will be printed on completion."
|
|
56
|
-
echo "
|
|
57
|
-
echo ""
|
|
58
|
-
echo "The --submit mode uses a SageMaker Processing Job with 2TB attached"
|
|
59
|
-
echo "storage, suitable for very large models that exceed local disk capacity."
|
|
61
|
+
echo "MODEL_NAME in do/config is updated automatically unless --no-update-config is passed."
|
|
60
62
|
exit 0
|
|
61
63
|
;;
|
|
62
64
|
*) shift ;;
|
|
@@ -126,11 +128,16 @@ echo "Target: ${S3_OUTPUT_URI}"
|
|
|
126
128
|
echo ""
|
|
127
129
|
|
|
128
130
|
# Install dependencies
|
|
129
|
-
echo "📦
|
|
130
|
-
pip install -q huggingface_hub[cli] hf_transfer
|
|
131
|
+
echo "📦 Checking huggingface-cli and hf_transfer..."
|
|
132
|
+
pip install -q huggingface_hub[cli] hf_transfer 2>/dev/null || true
|
|
131
133
|
|
|
132
|
-
# Enable fast parallel downloads
|
|
133
|
-
|
|
134
|
+
# Enable fast parallel downloads only if hf_transfer is available
|
|
135
|
+
if python3 -c "import hf_transfer" 2>/dev/null; then
|
|
136
|
+
export HF_HUB_ENABLE_HF_TRANSFER=1
|
|
137
|
+
else
|
|
138
|
+
echo " ℹ️ hf_transfer not available — using standard download (install with: pip install hf_transfer)"
|
|
139
|
+
unset HF_HUB_ENABLE_HF_TRANSFER 2>/dev/null || true
|
|
140
|
+
fi
|
|
134
141
|
|
|
135
142
|
# Set HF token if provided
|
|
136
143
|
if [ -n "${HF_TOKEN:-}" ]; then
|
|
@@ -324,15 +331,11 @@ print(d.get('FailureReason', ''))
|
|
|
324
331
|
CONFIG_FILE="${SCRIPT_DIR}/config"
|
|
325
332
|
sed -i.bak "s|^export MODEL_NAME=.*|export MODEL_NAME=\"${MODEL_S3_URI}\"|" "${CONFIG_FILE}"
|
|
326
333
|
rm -f "${CONFIG_FILE}.bak"
|
|
327
|
-
echo " ✅ Updated MODEL_NAME in do/config →
|
|
328
|
-
echo ""
|
|
329
|
-
echo " Re-deploy with S3-backed model: ./do/deploy"
|
|
334
|
+
echo " ✅ Updated MODEL_NAME in do/config → S3-backed"
|
|
335
|
+
echo " Subsequent tasks (submit, deploy) will pull from S3."
|
|
330
336
|
else
|
|
331
337
|
echo " To use this staged model, update do/config:"
|
|
332
338
|
echo " export MODEL_NAME=\"${MODEL_S3_URI}\""
|
|
333
|
-
echo ""
|
|
334
|
-
echo " Or re-run with --update-config:"
|
|
335
|
-
echo " ./do/stage --submit --update-config"
|
|
336
339
|
fi
|
|
337
340
|
return 0
|
|
338
341
|
;;
|
|
@@ -431,7 +434,7 @@ if [ "${FORCE}" = false ]; then
|
|
|
431
434
|
CONFIG_FILE="${SCRIPT_DIR}/config"
|
|
432
435
|
sed -i.bak "s|^export MODEL_NAME=.*|export MODEL_NAME=\"${MODEL_S3_URI}\"|" "${CONFIG_FILE}"
|
|
433
436
|
rm -f "${CONFIG_FILE}.bak"
|
|
434
|
-
echo " ✅ Updated MODEL_NAME in do/config →
|
|
437
|
+
echo " ✅ Updated MODEL_NAME in do/config → S3-backed"
|
|
435
438
|
else
|
|
436
439
|
echo " To use this staged model, set in do/config:"
|
|
437
440
|
echo " export MODEL_NAME=\"${MODEL_S3_URI}\""
|
|
@@ -472,13 +475,21 @@ fi
|
|
|
472
475
|
|
|
473
476
|
# ── Download model from HuggingFace ──────────────────────────────────────────
|
|
474
477
|
echo "⬇️ Downloading model from HuggingFace: ${MODEL_NAME}"
|
|
475
|
-
|
|
478
|
+
if python3 -c "import hf_transfer" 2>/dev/null; then
|
|
479
|
+
echo " Using hf_transfer for fast parallel downloads..."
|
|
480
|
+
else
|
|
481
|
+
echo " Using standard downloads (install hf_transfer for faster staging)..."
|
|
482
|
+
fi
|
|
476
483
|
echo ""
|
|
477
484
|
|
|
478
|
-
# Enable fast parallel downloads via hf_transfer
|
|
479
|
-
|
|
485
|
+
# Enable fast parallel downloads via hf_transfer (if available)
|
|
486
|
+
if python3 -c "import hf_transfer" 2>/dev/null; then
|
|
487
|
+
export HF_HUB_ENABLE_HF_TRANSFER=1
|
|
488
|
+
else
|
|
489
|
+
unset HF_HUB_ENABLE_HF_TRANSFER 2>/dev/null || true
|
|
490
|
+
fi
|
|
480
491
|
|
|
481
|
-
# Download to HF cache
|
|
492
|
+
# Download to HF cache
|
|
482
493
|
DOWNLOAD_ARGS=("${MODEL_NAME}")
|
|
483
494
|
if [ -n "${HF_TOKEN:-}" ]; then
|
|
484
495
|
DOWNLOAD_ARGS+=("--token" "${HF_TOKEN}")
|
|
@@ -553,15 +564,9 @@ if [ "${UPDATE_CONFIG}" = true ]; then
|
|
|
553
564
|
CONFIG_FILE="${SCRIPT_DIR}/config"
|
|
554
565
|
sed -i.bak "s|^export MODEL_NAME=.*|export MODEL_NAME=\"${MODEL_S3_URI}\"|" "${CONFIG_FILE}"
|
|
555
566
|
rm -f "${CONFIG_FILE}.bak"
|
|
556
|
-
echo " ✅ Updated MODEL_NAME in do/config →
|
|
557
|
-
echo ""
|
|
558
|
-
echo " Re-deploy with S3-backed model: ./do/deploy"
|
|
567
|
+
echo " ✅ Updated MODEL_NAME in do/config → S3-backed"
|
|
568
|
+
echo " Subsequent tasks (submit, deploy) will pull from S3."
|
|
559
569
|
else
|
|
560
570
|
echo " To use this staged model, update do/config:"
|
|
561
571
|
echo " export MODEL_NAME=\"${MODEL_S3_URI}\""
|
|
562
|
-
echo ""
|
|
563
|
-
echo " Or re-run with --update-config to do it automatically:"
|
|
564
|
-
echo " ./do/stage --update-config"
|
|
565
|
-
echo ""
|
|
566
|
-
echo " Then re-deploy: ./do/deploy"
|
|
567
572
|
fi
|
package/templates/do/submit
CHANGED
|
@@ -12,7 +12,7 @@ source "${SCRIPT_DIR}/config"
|
|
|
12
12
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
13
13
|
|
|
14
14
|
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
15
|
-
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${
|
|
15
|
+
ECR_REPOSITORY_NAME="${ECR_REPOSITORY_NAME:-${_PROFILE_ecrRepositoryName:-ml-container-creator}}"
|
|
16
16
|
|
|
17
17
|
# ── Derived variables (env var > computed default) ────────────────────────────
|
|
18
18
|
CODEBUILD_PROJECT_NAME="${CODEBUILD_PROJECT_NAME:-${PROJECT_NAME}-build-$(date +%Y%m%d)}"
|
package/templates/do/tune
CHANGED
|
@@ -16,7 +16,7 @@ source "${SCRIPT_DIR}/config"
|
|
|
16
16
|
source "${SCRIPT_DIR}/lib/profile.sh"
|
|
17
17
|
|
|
18
18
|
# ── Profile-resolved variables (env var > profile > default) ──────────────────
|
|
19
|
-
TUNE_S3_BUCKET="${TUNE_S3_BUCKET:-mlcc-tune-${
|
|
19
|
+
TUNE_S3_BUCKET="${TUNE_S3_BUCKET:-mlcc-tune-${_PROFILE_accountId:-unknown}-${_PROFILE_awsRegion:-us-east-1}}"
|
|
20
20
|
|
|
21
21
|
# ── Constants ─────────────────────────────────────────────────────────────────
|
|
22
22
|
CATALOG_FILE="${SCRIPT_DIR}/.tune_catalog.json"
|