@aws/ml-container-creator 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +5 -2
- package/infra/ci-harness/buildspec.yml +60 -0
- package/package.json +1 -1
- package/servers/README.md +41 -1
- package/servers/instance-sizer/index.js +6 -0
- package/src/app.js +33 -2
- package/src/lib/config-manager.js +40 -1
- package/src/lib/deployment-entry-schema.js +16 -0
- package/src/lib/prompt-runner.js +174 -3
- package/src/lib/prompts.js +222 -2
- package/src/lib/registry-command-handler.js +12 -0
- package/templates/Dockerfile +12 -0
- package/templates/code/serving.properties +14 -0
- package/templates/do/adapter +1214 -0
- package/templates/do/adapters/.gitkeep +2 -0
- package/templates/do/add-ic +130 -0
- package/templates/do/benchmark +81 -9
- package/templates/do/clean +507 -17
- package/templates/do/config +23 -1
- package/templates/do/deploy +513 -367
- package/templates/do/ic/default.conf +32 -0
- package/templates/do/lib/endpoint-config.sh +216 -0
- package/templates/do/lib/inference-component.sh +167 -0
- package/templates/do/lib/secrets.sh +44 -0
- package/templates/do/lib/wait.sh +131 -0
- package/templates/do/logs +107 -27
- package/templates/do/optimize +528 -0
- package/templates/do/register +111 -1
- package/templates/do/status +337 -0
- package/templates/do/test +80 -28
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
#
|
|
5
|
+
# Add a new inference component to this project.
|
|
6
|
+
# Creates a new IC config file in do/ic/ and deploys it immediately.
|
|
7
|
+
|
|
8
|
+
set -e
|
|
9
|
+
set -u
|
|
10
|
+
set -o pipefail
|
|
11
|
+
|
|
12
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
13
|
+
source "${SCRIPT_DIR}/config"
|
|
14
|
+
|
|
15
|
+
echo "➕ Add New Inference Component"
|
|
16
|
+
echo " Project: ${PROJECT_NAME}"
|
|
17
|
+
echo ""
|
|
18
|
+
|
|
19
|
+
# ============================================================
|
|
20
|
+
# Prompt for IC name
|
|
21
|
+
# ============================================================
|
|
22
|
+
while true; do
|
|
23
|
+
read -p "IC name (lowercase alphanumeric + hyphens): " IC_NAME
|
|
24
|
+
|
|
25
|
+
# Validate: non-empty
|
|
26
|
+
if [ -z "${IC_NAME}" ]; then
|
|
27
|
+
echo " ❌ IC name cannot be empty."
|
|
28
|
+
continue
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
# Validate: lowercase alphanumeric + hyphens only
|
|
32
|
+
if ! echo "${IC_NAME}" | grep -qE '^[a-z0-9]([a-z0-9-]*[a-z0-9])?$'; then
|
|
33
|
+
echo " ❌ IC name must be lowercase alphanumeric with hyphens (e.g., 'llama-70b')."
|
|
34
|
+
echo " Must start and end with a letter or number."
|
|
35
|
+
continue
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
# Validate: no collision with existing config
|
|
39
|
+
if [ -f "${SCRIPT_DIR}/ic/${IC_NAME}.conf" ]; then
|
|
40
|
+
echo " ❌ IC config already exists: do/ic/${IC_NAME}.conf"
|
|
41
|
+
echo " Choose a different name or edit the existing config."
|
|
42
|
+
continue
|
|
43
|
+
fi
|
|
44
|
+
|
|
45
|
+
break
|
|
46
|
+
done
|
|
47
|
+
|
|
48
|
+
# ============================================================
|
|
49
|
+
# Prompt for image tag
|
|
50
|
+
# ============================================================
|
|
51
|
+
DEFAULT_IMAGE_TAG="${PROJECT_NAME}-latest"
|
|
52
|
+
read -p "Image tag [${DEFAULT_IMAGE_TAG}]: " IC_IMAGE_TAG
|
|
53
|
+
IC_IMAGE_TAG="${IC_IMAGE_TAG:-${DEFAULT_IMAGE_TAG}}"
|
|
54
|
+
|
|
55
|
+
# ============================================================
|
|
56
|
+
# Prompt for GPU count
|
|
57
|
+
# ============================================================
|
|
58
|
+
read -p "GPU count [1]: " IC_GPU_COUNT
|
|
59
|
+
IC_GPU_COUNT="${IC_GPU_COUNT:-1}"
|
|
60
|
+
|
|
61
|
+
# Validate numeric
|
|
62
|
+
if ! echo "${IC_GPU_COUNT}" | grep -qE '^[0-9]+$'; then
|
|
63
|
+
echo " ❌ GPU count must be a positive integer."
|
|
64
|
+
exit 1
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
# ============================================================
|
|
68
|
+
# Prompt for copy count
|
|
69
|
+
# ============================================================
|
|
70
|
+
read -p "Copy count [1]: " IC_COPY_COUNT
|
|
71
|
+
IC_COPY_COUNT="${IC_COPY_COUNT:-1}"
|
|
72
|
+
|
|
73
|
+
# Validate numeric
|
|
74
|
+
if ! echo "${IC_COPY_COUNT}" | grep -qE '^[0-9]+$'; then
|
|
75
|
+
echo " ❌ Copy count must be a positive integer."
|
|
76
|
+
exit 1
|
|
77
|
+
fi
|
|
78
|
+
|
|
79
|
+
# ============================================================
|
|
80
|
+
# Prompt for memory MB
|
|
81
|
+
# ============================================================
|
|
82
|
+
read -p "Min memory MB [1024]: " IC_MIN_MEMORY_MB
|
|
83
|
+
IC_MIN_MEMORY_MB="${IC_MIN_MEMORY_MB:-1024}"
|
|
84
|
+
|
|
85
|
+
# Validate numeric
|
|
86
|
+
if ! echo "${IC_MIN_MEMORY_MB}" | grep -qE '^[0-9]+$'; then
|
|
87
|
+
echo " ❌ Memory MB must be a positive integer."
|
|
88
|
+
exit 1
|
|
89
|
+
fi
|
|
90
|
+
|
|
91
|
+
# ============================================================
|
|
92
|
+
# Create IC config file
|
|
93
|
+
# ============================================================
|
|
94
|
+
IC_CONF_PATH="${SCRIPT_DIR}/ic/${IC_NAME}.conf"
|
|
95
|
+
mkdir -p "${SCRIPT_DIR}/ic"
|
|
96
|
+
|
|
97
|
+
cat > "${IC_CONF_PATH}" <<EOF
|
|
98
|
+
# Per-IC configuration: ${IC_NAME}
|
|
99
|
+
# Created by do/add-ic on $(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
100
|
+
#
|
|
101
|
+
# This file is sourced by do/lib/inference-component.sh during deployment.
|
|
102
|
+
# After deployment, IC_DEPLOYED_NAME and IC_DEPLOYED_AT will be appended
|
|
103
|
+
# by the deploy script to track the active inference component.
|
|
104
|
+
|
|
105
|
+
export IC_IMAGE_TAG="${IC_IMAGE_TAG}"
|
|
106
|
+
export IC_GPU_COUNT=${IC_GPU_COUNT}
|
|
107
|
+
export IC_COPY_COUNT=${IC_COPY_COUNT}
|
|
108
|
+
export IC_MIN_MEMORY_MB=${IC_MIN_MEMORY_MB}
|
|
109
|
+
export IC_STARTUP_TIMEOUT=900
|
|
110
|
+
|
|
111
|
+
# Optional overrides:
|
|
112
|
+
# export IC_MODEL_NAME="my-model-v2"
|
|
113
|
+
# export IC_CONTAINER_ENV_EXTRA='"KEY":"value"'
|
|
114
|
+
|
|
115
|
+
EOF
|
|
116
|
+
|
|
117
|
+
echo ""
|
|
118
|
+
echo "✅ Created IC config: do/ic/${IC_NAME}.conf"
|
|
119
|
+
echo " Image tag: ${IC_IMAGE_TAG}"
|
|
120
|
+
echo " GPU count: ${IC_GPU_COUNT}"
|
|
121
|
+
echo " Copy count: ${IC_COPY_COUNT}"
|
|
122
|
+
echo " Memory MB: ${IC_MIN_MEMORY_MB}"
|
|
123
|
+
echo ""
|
|
124
|
+
|
|
125
|
+
# ============================================================
|
|
126
|
+
# Deploy the new IC immediately
|
|
127
|
+
# ============================================================
|
|
128
|
+
echo "🚀 Deploying IC '${IC_NAME}'..."
|
|
129
|
+
echo ""
|
|
130
|
+
exec "${SCRIPT_DIR}/deploy" --ic "${IC_NAME}"
|
package/templates/do/benchmark
CHANGED
|
@@ -17,18 +17,29 @@ source "${SCRIPT_DIR}/config"
|
|
|
17
17
|
# ── Parse flags ───────────────────────────────────────────────────────────────
|
|
18
18
|
CLEAN_AFTER=false
|
|
19
19
|
FORCE=false
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
20
|
+
IC_ARG=""
|
|
21
|
+
ADAPTER_ARG=""
|
|
22
|
+
while [ $# -gt 0 ]; do
|
|
23
|
+
case "$1" in
|
|
24
|
+
--clean) CLEAN_AFTER=true; shift ;;
|
|
25
|
+
--force) FORCE=true; shift ;;
|
|
26
|
+
--ic) shift; IC_ARG="${1:-}"; shift ;;
|
|
27
|
+
--adapter) shift; ADAPTER_ARG="${1:-}"; shift ;;
|
|
24
28
|
--help|-h)
|
|
25
|
-
echo "Usage: ./do/benchmark [--force] [--clean]"
|
|
29
|
+
echo "Usage: ./do/benchmark [--ic <name>] [--adapter <name>] [--force] [--clean]"
|
|
26
30
|
echo ""
|
|
27
31
|
echo "Run SageMaker AI Benchmark against the deployed endpoint."
|
|
28
32
|
echo ""
|
|
29
33
|
echo "Options:"
|
|
30
|
-
echo " --
|
|
31
|
-
echo " --
|
|
34
|
+
echo " --ic <name> Benchmark a specific inference component"
|
|
35
|
+
echo " --adapter <name> Benchmark a specific LoRA adapter IC"
|
|
36
|
+
echo " --force Create a new benchmark job even if one is already running"
|
|
37
|
+
echo " --clean Delete workload config and benchmark job after displaying results"
|
|
38
|
+
echo ""
|
|
39
|
+
echo "IC resolution:"
|
|
40
|
+
echo " --adapter <name> Use ADAPTER_IC_NAME from do/adapters/<name>.conf"
|
|
41
|
+
echo " --ic <name> Use IC_DEPLOYED_NAME from do/ic/<name>.conf"
|
|
42
|
+
echo " (no flag) Use first IC in do/ic/ alphabetically, or legacy config"
|
|
32
43
|
echo ""
|
|
33
44
|
echo "Idempotency:"
|
|
34
45
|
echo " If a benchmark job is already in progress, re-running without --force"
|
|
@@ -39,6 +50,7 @@ for arg in "$@"; do
|
|
|
39
50
|
echo " • AWS credentials must be configured"
|
|
40
51
|
exit 0
|
|
41
52
|
;;
|
|
53
|
+
*) shift ;;
|
|
42
54
|
esac
|
|
43
55
|
done
|
|
44
56
|
|
|
@@ -52,6 +64,66 @@ if ! aws --version 2>&1 | grep -q "aws-cli/2"; then
|
|
|
52
64
|
exit 1
|
|
53
65
|
fi
|
|
54
66
|
|
|
67
|
+
# ── Resolve inference component name ──────────────────────────────────────────
|
|
68
|
+
# Resolution precedence: --adapter <name>, --ic <name>, first in do/ic/, or legacy config
|
|
69
|
+
IC_NAME=""
|
|
70
|
+
if [ -n "${ADAPTER_ARG}" ]; then
|
|
71
|
+
# Adapter name provided via --adapter flag — look up adapter IC
|
|
72
|
+
ADAPTER_CONF="${SCRIPT_DIR}/adapters/${ADAPTER_ARG}.conf"
|
|
73
|
+
if [ ! -f "${ADAPTER_CONF}" ]; then
|
|
74
|
+
echo "❌ Adapter config not found: do/adapters/${ADAPTER_ARG}.conf"
|
|
75
|
+
echo " Available adapters:"
|
|
76
|
+
if [ -d "${SCRIPT_DIR}/adapters" ]; then
|
|
77
|
+
for conf in "${SCRIPT_DIR}"/adapters/*.conf; do
|
|
78
|
+
[ -f "${conf}" ] || continue
|
|
79
|
+
echo " • $(basename "${conf}" .conf)"
|
|
80
|
+
done
|
|
81
|
+
else
|
|
82
|
+
echo " (none)"
|
|
83
|
+
fi
|
|
84
|
+
exit 1
|
|
85
|
+
fi
|
|
86
|
+
ADAPTER_IC_NAME=""
|
|
87
|
+
source "${ADAPTER_CONF}"
|
|
88
|
+
if [ -z "${ADAPTER_IC_NAME}" ]; then
|
|
89
|
+
echo "❌ Adapter '${ADAPTER_ARG}' conf is missing ADAPTER_IC_NAME."
|
|
90
|
+
exit 1
|
|
91
|
+
fi
|
|
92
|
+
IC_NAME="${ADAPTER_IC_NAME}"
|
|
93
|
+
elif [ -n "${IC_ARG}" ]; then
|
|
94
|
+
# Explicit IC name provided via --ic flag
|
|
95
|
+
IC_CONF="${SCRIPT_DIR}/ic/${IC_ARG}.conf"
|
|
96
|
+
if [ ! -f "${IC_CONF}" ]; then
|
|
97
|
+
echo "❌ IC config not found: do/ic/${IC_ARG}.conf"
|
|
98
|
+
exit 1
|
|
99
|
+
fi
|
|
100
|
+
IC_DEPLOYED_NAME=""
|
|
101
|
+
source "${IC_CONF}"
|
|
102
|
+
if [ -z "${IC_DEPLOYED_NAME}" ]; then
|
|
103
|
+
echo "❌ IC '${IC_ARG}' has not been deployed yet. Run ./do/deploy --ic ${IC_ARG} first."
|
|
104
|
+
exit 1
|
|
105
|
+
fi
|
|
106
|
+
IC_NAME="${IC_DEPLOYED_NAME}"
|
|
107
|
+
elif [ -d "${SCRIPT_DIR}/ic" ]; then
|
|
108
|
+
# No --ic argument, but do/ic/ exists — use first IC alphabetically
|
|
109
|
+
for conf in "${SCRIPT_DIR}"/ic/*.conf; do
|
|
110
|
+
[ -f "${conf}" ] || continue
|
|
111
|
+
IC_DEPLOYED_NAME=""
|
|
112
|
+
source "${conf}"
|
|
113
|
+
if [ -n "${IC_DEPLOYED_NAME}" ]; then
|
|
114
|
+
IC_NAME="${IC_DEPLOYED_NAME}"
|
|
115
|
+
break
|
|
116
|
+
fi
|
|
117
|
+
done
|
|
118
|
+
if [ -z "${IC_NAME}" ]; then
|
|
119
|
+
echo "❌ No ICs deployed. Run ./do/deploy first."
|
|
120
|
+
exit 1
|
|
121
|
+
fi
|
|
122
|
+
else
|
|
123
|
+
# Legacy: no do/ic/ directory, use INFERENCE_COMPONENT_NAME from do/config
|
|
124
|
+
IC_NAME="${INFERENCE_COMPONENT_NAME:-}"
|
|
125
|
+
fi
|
|
126
|
+
|
|
55
127
|
# ── Helper: update a variable in do/config ────────────────────────────────────
|
|
56
128
|
_update_benchmark_var() {
|
|
57
129
|
local var_name="$1"
|
|
@@ -123,7 +195,7 @@ MAX_POLL_ATTEMPTS=60 # 30 minutes max (60 * 30s)
|
|
|
123
195
|
echo "📊 SageMaker AI Benchmark"
|
|
124
196
|
echo " Project: ${PROJECT_NAME}"
|
|
125
197
|
echo " Endpoint: ${ENDPOINT_NAME:-not set}"
|
|
126
|
-
echo " Inference Component: ${
|
|
198
|
+
echo " Inference Component: ${IC_NAME:-not set}"
|
|
127
199
|
echo " Concurrency: ${BENCHMARK_CONCURRENCY}"
|
|
128
200
|
echo " Input tokens (mean): ${BENCHMARK_INPUT_TOKENS_MEAN}"
|
|
129
201
|
echo " Output tokens (mean): ${BENCHMARK_OUTPUT_TOKENS_MEAN}"
|
|
@@ -318,7 +390,7 @@ echo ""
|
|
|
318
390
|
# Target the deployed endpoint and inference component with the workload config.
|
|
319
391
|
echo "🚀 Step 2: Creating AI Benchmark Job: ${BENCHMARK_JOB_NAME}"
|
|
320
392
|
|
|
321
|
-
BENCHMARK_TARGET="{\"Endpoint\":{\"Identifier\":\"${ENDPOINT_NAME}\",\"InferenceComponents\":[{\"Identifier\":\"${
|
|
393
|
+
BENCHMARK_TARGET="{\"Endpoint\":{\"Identifier\":\"${ENDPOINT_NAME}\",\"InferenceComponents\":[{\"Identifier\":\"${IC_NAME}\"}]}}"
|
|
322
394
|
OUTPUT_CONFIG="{\"S3OutputLocation\":\"${BENCHMARK_S3_OUTPUT_PATH}\"}"
|
|
323
395
|
|
|
324
396
|
if ! aws sagemaker create-ai-benchmark-job \
|