@aws/ml-container-creator 0.13.5 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/parameter-schema-v2.json +32 -4
- package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
- package/infra/ci-harness/package-lock.json +121 -111
- package/infra/ci-harness/package.json +1 -1
- package/package.json +2 -2
- package/servers/instance-sizer/index.js +72 -4
- package/servers/instance-sizer/lib/model-resolver.js +28 -2
- package/src/app.js +15 -0
- package/src/lib/config-loader.js +18 -0
- package/src/lib/config-manager.js +6 -1
- package/src/lib/dataset-slug.js +152 -0
- package/src/lib/generated/cli-options.js +9 -3
- package/src/lib/generated/parameter-matrix.js +14 -3
- package/src/lib/generated/validation-rules.js +1 -1
- package/src/lib/mcp-query-runner.js +6 -0
- package/src/lib/prompt-runner.js +5 -0
- package/src/lib/prompts/feature-prompts.js +1 -1
- package/src/lib/template-manager.js +0 -7
- package/src/lib/template-variable-resolver.js +51 -1
- package/src/lib/tune-config-state.js +14 -1
- package/templates/do/.benchmark_writer.py +9 -0
- package/templates/do/.register_helper.py +1163 -0
- package/templates/do/.tune_helper.py +168 -2
- package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
- package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
- package/templates/do/adapter +319 -27
- package/templates/do/add-ic +85 -3
- package/templates/do/benchmark +28 -8
- package/templates/do/config +20 -0
- package/templates/do/lib/inference-component.sh +56 -3
- package/templates/do/register +552 -6
- package/templates/do/test +12 -2
- package/templates/do/tune +201 -6
package/templates/do/benchmark
CHANGED
|
@@ -39,17 +39,18 @@ while [ $# -gt 0 ]; do
|
|
|
39
39
|
echo "Options:"
|
|
40
40
|
echo " --status Check job status; if completed, download results + write to Athena"
|
|
41
41
|
echo " --ic <name> Benchmark a specific inference component"
|
|
42
|
-
echo " --adapter <name> Benchmark a specific LoRA adapter IC"
|
|
42
|
+
echo " --adapter <name> Benchmark a specific LoRA adapter (routes via adapter IC, records adapter_name in Athena)"
|
|
43
43
|
echo " --force Create a new benchmark job even if one is already running"
|
|
44
44
|
echo " --clean Delete workload config and benchmark job after displaying results"
|
|
45
45
|
echo " --no-stale-warning Suppress schema registry staleness warning"
|
|
46
|
-
echo " --no-stale-warning Suppress schema registry staleness warning"
|
|
47
46
|
echo ""
|
|
48
47
|
echo "IC resolution:"
|
|
49
48
|
echo " --adapter <name> Use ADAPTER_IC_NAME from do/adapters/<name>.conf"
|
|
50
49
|
echo " --ic <name> Use IC_DEPLOYED_NAME from do/ic/<name>.conf"
|
|
51
50
|
echo " (no flag) Use first IC in do/ic/ alphabetically, or legacy config"
|
|
52
51
|
echo ""
|
|
52
|
+
echo "Adapter benchmarks are differentiated from base model runs in Athena via the adapter_name column."
|
|
53
|
+
echo ""
|
|
53
54
|
echo "Status:"
|
|
54
55
|
echo " After interrupting a running benchmark, use --status to check completion"
|
|
55
56
|
echo " and trigger results download + Athena write."
|
|
@@ -113,10 +114,17 @@ if [ "${ARG_STATUS}" = true ]; then
|
|
|
113
114
|
aws s3 sync "${RESULTS_S3_PATH}" "${LOCAL_RESULTS_DIR}/output/" \
|
|
114
115
|
--region "${AWS_REGION}" --quiet
|
|
115
116
|
# Untar if output.tar.gz exists
|
|
116
|
-
|
|
117
|
+
tar_file=""
|
|
117
118
|
tar_file=$(find "${LOCAL_RESULTS_DIR}" -name "output.tar.gz" -type f 2>/dev/null | head -1)
|
|
118
119
|
if [ -n "${tar_file}" ]; then
|
|
119
|
-
|
|
120
|
+
# Detect whether tar has a leading directory prefix
|
|
121
|
+
_tar_first=""
|
|
122
|
+
_tar_first=$(tar -tzf "${tar_file}" 2>/dev/null | head -1)
|
|
123
|
+
if echo "${_tar_first}" | grep -qE '^[^/]+/$'; then
|
|
124
|
+
tar -xzf "${tar_file}" --strip-components=1 -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
|
|
125
|
+
else
|
|
126
|
+
tar -xzf "${tar_file}" -C "${LOCAL_RESULTS_DIR}/output/" 2>/dev/null || true
|
|
127
|
+
fi
|
|
120
128
|
fi
|
|
121
129
|
# Re-search after extraction
|
|
122
130
|
RESULTS_JSONL=$(find "${LOCAL_RESULTS_DIR}" -name "profile_export.jsonl" -type f 2>/dev/null | head -1)
|
|
@@ -145,7 +153,8 @@ if [ "${ARG_STATUS}" = true ]; then
|
|
|
145
153
|
--workload "${BENCHMARK_WORKLOAD:-manual}" \
|
|
146
154
|
--concurrency "${BENCHMARK_CONCURRENCY:-2}" \
|
|
147
155
|
--bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
|
|
148
|
-
--region "${AWS_REGION:-${REGION}}"
|
|
156
|
+
--region "${AWS_REGION:-${REGION}}" \
|
|
157
|
+
${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
|
|
149
158
|
echo " ✅ Results persisted to Athena"
|
|
150
159
|
else
|
|
151
160
|
echo " ⚠️ Athena write failed (non-fatal)"
|
|
@@ -549,7 +558,8 @@ print(f'Combined {n_metrics} concurrency level results')
|
|
|
549
558
|
--project-name "${PROJECT_NAME}" \
|
|
550
559
|
--workload "${BENCHMARK_WORKLOAD:-manual}" \
|
|
551
560
|
--bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
|
|
552
|
-
--region "${AWS_REGION:-${REGION}}"
|
|
561
|
+
--region "${AWS_REGION:-${REGION}}" \
|
|
562
|
+
${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
|
|
553
563
|
echo "✅ Multi-level benchmark results persisted to S3"
|
|
554
564
|
else
|
|
555
565
|
echo "⚠️ Failed to persist multi-level benchmark results to Athena (non-fatal)"
|
|
@@ -1087,7 +1097,16 @@ if [ "${JOB_STATUS}" = "Completed" ]; then
|
|
|
1087
1097
|
# Extract any tar.gz archives (benchmark service packages results as output.tar.gz)
|
|
1088
1098
|
for ARCHIVE in $(find "${LOCAL_RESULTS_DIR}" -name "*.tar.gz" -type f 2>/dev/null); do
|
|
1089
1099
|
ARCHIVE_DIR=$(dirname "${ARCHIVE}")
|
|
1090
|
-
tar
|
|
1100
|
+
# Detect whether tar has a leading directory prefix to strip.
|
|
1101
|
+
# Some AIPerf versions wrap in output/, others are flat.
|
|
1102
|
+
_TAR_FIRST=$(tar -tzf "${ARCHIVE}" 2>/dev/null | head -1)
|
|
1103
|
+
if echo "${_TAR_FIRST}" | grep -qE '^[^/]+/$'; then
|
|
1104
|
+
# Leading directory (e.g., "output/") — strip it
|
|
1105
|
+
tar -xzf "${ARCHIVE}" --strip-components=1 -C "${ARCHIVE_DIR}" 2>/dev/null || true
|
|
1106
|
+
else
|
|
1107
|
+
# Flat archive — extract as-is
|
|
1108
|
+
tar -xzf "${ARCHIVE}" -C "${ARCHIVE_DIR}" 2>/dev/null || true
|
|
1109
|
+
fi
|
|
1091
1110
|
done
|
|
1092
1111
|
|
|
1093
1112
|
# Look for specific result files (priority: JSONL > aiperf JSON)
|
|
@@ -1346,7 +1365,8 @@ except Exception as e:
|
|
|
1346
1365
|
--workload "${BENCHMARK_WORKLOAD:-manual}" \
|
|
1347
1366
|
--concurrency "${BENCHMARK_CONCURRENCY}" \
|
|
1348
1367
|
--bucket "${CI_BENCHMARK_RESULTS_BUCKET}" \
|
|
1349
|
-
--region "${AWS_REGION:-${REGION}}"
|
|
1368
|
+
--region "${AWS_REGION:-${REGION}}" \
|
|
1369
|
+
${ADAPTER_ARG:+--adapter-name "${ADAPTER_ARG}"}; then
|
|
1350
1370
|
echo "✅ Benchmark results persisted to S3"
|
|
1351
1371
|
else
|
|
1352
1372
|
echo "⚠️ Failed to persist benchmark results to Athena (non-fatal)"
|
package/templates/do/config
CHANGED
|
@@ -211,6 +211,26 @@ export <%= key %>=${<%= key %>:-<%= value %>}
|
|
|
211
211
|
<% }); %>
|
|
212
212
|
<% } %>
|
|
213
213
|
|
|
214
|
+
<% if (typeof icEnvVars !== 'undefined' && icEnvVars && Object.keys(icEnvVars).length > 0) { %>
|
|
215
|
+
# Deploy-time IC environment variables (applied at IC creation via SDK v3, not baked into image)
|
|
216
|
+
# Max 16 vars, max 1024 chars per key/value
|
|
217
|
+
# WARNING: Do not store raw secrets here. Use Secrets Manager ARN pattern instead:
|
|
218
|
+
# export IC_ENV_HF_TOKEN_ARN=arn:aws:secretsmanager:REGION:ACCOUNT:secret:NAME
|
|
219
|
+
# Runtime code resolves the ARN to the secret value (same pattern as HF_TOKEN_ARN in do/build).
|
|
220
|
+
<% Object.entries(icEnvVars).forEach(([key, value]) => { %>
|
|
221
|
+
export IC_ENV_<%= key %>=${IC_ENV_<%= key %>:-<%= value %>}
|
|
222
|
+
<% }); %>
|
|
223
|
+
<% } else if (deploymentTarget === 'realtime-inference') { %>
|
|
224
|
+
# ─── Deploy-time IC environment variables (uncomment to configure) ─────────────
|
|
225
|
+
# These are passed as the Environment field in InferenceComponent.create() at deploy time.
|
|
226
|
+
# They do NOT affect the Docker build — build-time env vars remain in Dockerfile via ENV.
|
|
227
|
+
# Max 16 vars, max 1024 chars per key/value.
|
|
228
|
+
# WARNING: Do not store raw secrets here. Use Secrets Manager ARN pattern instead:
|
|
229
|
+
# export IC_ENV_HF_TOKEN_ARN=arn:aws:secretsmanager:REGION:ACCOUNT:secret:NAME
|
|
230
|
+
# export IC_ENV_VLLM_MAX_MODEL_LEN=8192
|
|
231
|
+
# export IC_ENV_VLLM_GPU_MEMORY_UTILIZATION=0.85
|
|
232
|
+
<% } %>
|
|
233
|
+
|
|
214
234
|
# Framework-specific configuration
|
|
215
235
|
<% if (framework === 'transformers') { %>
|
|
216
236
|
export MODEL_NAME="<%= modelName %>"
|
|
@@ -6,6 +6,50 @@
|
|
|
6
6
|
# PROJECT_NAME, ENDPOINT_NAME, ECR_REPOSITORY, AWS_REGION, CONTAINER_ENV_JSON
|
|
7
7
|
# Also expects _update_config_var() to be available (from wait.sh).
|
|
8
8
|
|
|
9
|
+
# _collect_ic_env_vars()
|
|
10
|
+
# Reads IC_ENV_* prefixed variables from the environment (sourced from do/config),
|
|
11
|
+
# strips the IC_ENV_ prefix, validates constraints, and outputs JSON key-value pairs.
|
|
12
|
+
# Constraints: max 16 entries, max 1024 chars per key/value.
|
|
13
|
+
# IC_ENV_* overrides take precedence over CONTAINER_ENV_JSON.
|
|
14
|
+
#
|
|
15
|
+
# Sets IC_ENV_OVERRIDE in the caller's scope.
|
|
16
|
+
_collect_ic_env_vars() {
|
|
17
|
+
IC_ENV_OVERRIDE=""
|
|
18
|
+
local ic_env_count=0
|
|
19
|
+
|
|
20
|
+
while IFS='=' read -r full_key value; do
|
|
21
|
+
# Skip empty lines
|
|
22
|
+
[ -z "${full_key}" ] && continue
|
|
23
|
+
|
|
24
|
+
local stripped_key="${full_key#IC_ENV_}"
|
|
25
|
+
|
|
26
|
+
# Validate key length (AC-3.4)
|
|
27
|
+
if [ ${#stripped_key} -gt 1024 ]; then
|
|
28
|
+
echo "⚠️ IC_ENV_${stripped_key}: key exceeds 1024 chars, skipping" >&2
|
|
29
|
+
continue
|
|
30
|
+
fi
|
|
31
|
+
|
|
32
|
+
# Validate value length (AC-3.4)
|
|
33
|
+
if [ ${#value} -gt 1024 ]; then
|
|
34
|
+
echo "⚠️ IC_ENV_${stripped_key}: value exceeds 1024 chars, skipping" >&2
|
|
35
|
+
continue
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
ic_env_count=$((ic_env_count + 1))
|
|
39
|
+
|
|
40
|
+
# Max 16 env vars (AC-3.3)
|
|
41
|
+
if [ ${ic_env_count} -gt 16 ]; then
|
|
42
|
+
echo "⚠️ More than 16 IC_ENV_* variables defined. Using first 16 only." >&2
|
|
43
|
+
break
|
|
44
|
+
fi
|
|
45
|
+
|
|
46
|
+
if [ -n "${IC_ENV_OVERRIDE}" ]; then
|
|
47
|
+
IC_ENV_OVERRIDE="${IC_ENV_OVERRIDE},"
|
|
48
|
+
fi
|
|
49
|
+
IC_ENV_OVERRIDE="${IC_ENV_OVERRIDE}\"${stripped_key}\":\"${value}\""
|
|
50
|
+
done < <(env | grep "^IC_ENV_" | sort)
|
|
51
|
+
}
|
|
52
|
+
|
|
9
53
|
# create_inference_component <ic_config_file>
|
|
10
54
|
# Creates an inference component from a per-IC config file.
|
|
11
55
|
#
|
|
@@ -17,6 +61,10 @@
|
|
|
17
61
|
# IC_STARTUP_TIMEOUT — container startup health check timeout in seconds (default: 900)
|
|
18
62
|
# IC_CONTAINER_ENV_EXTRA — optional extra env vars in "KEY":"value" format
|
|
19
63
|
#
|
|
64
|
+
# IC_ENV_* prefixed vars from do/config are collected, validated, and passed
|
|
65
|
+
# as the Environment field in InferenceComponent.create() via SDK v3.
|
|
66
|
+
# Precedence: IC_ENV_* > IC_CONTAINER_ENV_EXTRA > CONTAINER_ENV_JSON
|
|
67
|
+
#
|
|
20
68
|
# Multi-spec support (for heterogeneous instance pools):
|
|
21
69
|
# IC_MULTI_SPEC — set to "true" to use Specifications (plural) array
|
|
22
70
|
# IC_SPEC_COUNT — number of spec entries (e.g., 2)
|
|
@@ -38,6 +86,9 @@ create_inference_component() {
|
|
|
38
86
|
# Source the IC config to get per-IC settings
|
|
39
87
|
source "${ic_conf}"
|
|
40
88
|
|
|
89
|
+
# Collect IC_ENV_* overrides from environment (sourced from do/config)
|
|
90
|
+
_collect_ic_env_vars
|
|
91
|
+
|
|
41
92
|
local ic_timestamp
|
|
42
93
|
ic_timestamp=$(date +%s)
|
|
43
94
|
local ic_basename
|
|
@@ -48,9 +99,11 @@ create_inference_component() {
|
|
|
48
99
|
local container_spec="{\"Image\":\"${ECR_REPOSITORY}:${IC_IMAGE_TAG:-${PROJECT_NAME}-latest}\""
|
|
49
100
|
# Always inject IC name for CW log forwarder
|
|
50
101
|
local ic_env="\"INFERENCE_COMPONENT_NAME\":\"${ic_name}\""
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
102
|
+
# Build environment JSON with precedence: IC_ENV_* > IC_CONTAINER_ENV_EXTRA > CONTAINER_ENV_JSON
|
|
103
|
+
local env_json="${CONTAINER_ENV_JSON}"
|
|
104
|
+
[ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
|
|
105
|
+
[ -n "${IC_ENV_OVERRIDE:-}" ] && env_json="${env_json:+${env_json},}${IC_ENV_OVERRIDE}"
|
|
106
|
+
if [ -n "${env_json}" ]; then
|
|
54
107
|
container_spec="${container_spec},\"Environment\":{${ic_env},${env_json}}"
|
|
55
108
|
else
|
|
56
109
|
container_spec="${container_spec},\"Environment\":{${ic_env}}"
|