@aws/ml-container-creator 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Per-IC configuration: default
5
+ # This file defines the primary inference component for the project.
6
+ # It is sourced by do/lib/inference-component.sh during deployment.
7
+ #
8
+ # After deployment, IC_DEPLOYED_NAME and IC_DEPLOYED_AT will be appended
9
+ # by the deploy script to track the active inference component.
10
+
11
+ export IC_IMAGE_TAG="<%= projectName %>-latest"
12
+ export IC_GPU_COUNT=<%= (typeof icGpuCount !== 'undefined' && icGpuCount != null) ? icGpuCount : 1 %>
13
+ export IC_COPY_COUNT=1
14
+ export IC_MIN_MEMORY_MB=1024
15
+ export IC_STARTUP_TIMEOUT=900
16
+ <% if (typeof instancePoolSpecs !== 'undefined' && instancePoolSpecs && instancePoolSpecs.length > 1) { %>
17
+
18
+ # Multi-spec IC configuration (auto-generated from instance pool selections)
19
+ # When the endpoint uses instance pools, the IC uses Specifications (plural)
20
+ # with per-instance-type compute resource requirements.
21
+ export IC_MULTI_SPEC=true
22
+ export IC_SPEC_COUNT=<%= instancePoolSpecs.length %>
23
+ <% instancePoolSpecs.forEach(function(spec, idx) { %>
24
+ export IC_SPEC_<%= idx + 1 %>_INSTANCE_TYPE="<%= spec.instanceType %>"
25
+ export IC_SPEC_<%= idx + 1 %>_GPU_COUNT=<%= spec.gpuCount %>
26
+ export IC_SPEC_<%= idx + 1 %>_MIN_MEMORY_MB=<%= spec.minMemoryMb %>
27
+ <% }); %>
28
+ <% } %>
29
+
30
+ # Optional overrides:
31
+ # export IC_MODEL_NAME="my-model-v2"
32
+ # export IC_CONTAINER_ENV_EXTRA='"KEY":"value"'
@@ -0,0 +1,216 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Shared helper: create SageMaker endpoint configuration.
5
+ # Sourced by do/deploy — expects PROJECT_NAME, AWS_REGION to be set by the caller.
6
+ # One of INSTANCE_TYPE or INSTANCE_POOLS must be set (mutually exclusive).
7
+ # Optional: ROLE_ARN, INFERENCE_AMI_VERSION, CAPACITY_RESERVATION_ARN, ASYNC_INFERENCE_CONFIG,
8
+ # POOL_TIMEOUT (default: 1200), POOL_INSTANCE_COUNT (default: 1), MODEL_NAME_SM.
9
+
10
+ # _validate_instance_pools()
11
+ # Validates that all instance types in INSTANCE_POOLS are compatible:
12
+ # - All types must share the same accelerator generation (same CUDA/AMI requirements)
13
+ # - Cannot mix CUDA and Neuron accelerator types
14
+ # - Unknown instance types produce a warning but do not block deployment
15
+ #
16
+ # Uses a hardcoded map of instance type prefixes to their generation/AMI compatibility:
17
+ # cuda-11 (AMI 2-x): g4dn, g5, g5g, p3, p4d, p4de
18
+ # cuda-12 (AMI 3-x): g6, g6e, p5
19
+ # cuda-next (AMI 4-x): p6, g7e
20
+ # neuron: inf1, inf2, trn1
21
+ #
22
+ # Exits with error if incompatible types are detected.
23
+ _validate_instance_pools() {
24
+ # Map instance family prefixes to their generation
25
+ # Format: "family_prefix=generation"
26
+ local -a GENERATION_MAP=(
27
+ "ml.g4dn.=cuda-11"
28
+ "ml.g5.=cuda-11"
29
+ "ml.g5g.=cuda-11"
30
+ "ml.p3.=cuda-11"
31
+ "ml.p4d.=cuda-11"
32
+ "ml.p4de.=cuda-11"
33
+ "ml.g6.=cuda-12"
34
+ "ml.g6e.=cuda-12"
35
+ "ml.p5.=cuda-12"
36
+ "ml.p5e.=cuda-12"
37
+ "ml.p5en.=cuda-12"
38
+ "ml.p6.=cuda-next"
39
+ "ml.g7e.=cuda-next"
40
+ "ml.inf1.=neuron"
41
+ "ml.inf2.=neuron"
42
+ "ml.trn1.=neuron"
43
+ )
44
+
45
+ # Extract instance types from INSTANCE_POOLS JSON
46
+ # INSTANCE_POOLS format: [{"InstanceType":"ml.g6e.48xlarge","Priority":1},...]
47
+ # Use simple string parsing to extract InstanceType values
48
+ local pool_types=""
49
+ pool_types=$(echo "${INSTANCE_POOLS}" | grep -oE '"InstanceType"\s*:\s*"[^"]+"' | sed 's/"InstanceType"\s*:\s*"//;s/"$//' || true)
50
+
51
+ if [ -z "${pool_types}" ]; then
52
+ return 0
53
+ fi
54
+
55
+ local first_generation=""
56
+ local first_type=""
57
+ local has_unknown=false
58
+
59
+ while IFS= read -r instance_type; do
60
+ [ -z "${instance_type}" ] && continue
61
+
62
+ local generation=""
63
+ for entry in "${GENERATION_MAP[@]}"; do
64
+ local prefix="${entry%%=*}"
65
+ local gen="${entry##*=}"
66
+ if [[ "${instance_type}" == ${prefix}* ]]; then
67
+ generation="${gen}"
68
+ break
69
+ fi
70
+ done
71
+
72
+ if [ -z "${generation}" ]; then
73
+ echo " ⚠️ Unknown instance type in pool: ${instance_type} — skipping validation for this type"
74
+ has_unknown=true
75
+ continue
76
+ fi
77
+
78
+ if [ -z "${first_generation}" ]; then
79
+ first_generation="${generation}"
80
+ first_type="${instance_type}"
81
+ elif [ "${generation}" != "${first_generation}" ]; then
82
+ echo "❌ Cannot mix ${first_type} (${first_generation}) and ${instance_type} (${generation}) in same pool — different CUDA/AMI requirements"
83
+ echo " All instance types in a pool must share the same InferenceAmiVersion."
84
+ echo ""
85
+ echo " Generation groupings:"
86
+ echo " cuda-11 (AMI 2-x): ml.g4dn.*, ml.g5.*, ml.p3.*, ml.p4d.*"
87
+ echo " cuda-12 (AMI 3-x): ml.g6.*, ml.g6e.*, ml.p5.*"
88
+ echo " neuron: ml.inf1.*, ml.inf2.*, ml.trn1.*"
89
+ echo ""
90
+ echo " Fix: use instance types from the same generation in your pool."
91
+ exit 1
92
+ fi
93
+ done <<< "${pool_types}"
94
+ }
95
+
96
+ # create_endpoint_config()
97
+ # Builds a ProductionVariant JSON and calls `aws sagemaker create-endpoint-config`.
98
+ # Sets the global ENDPOINT_CONFIG_NAME variable for downstream use.
99
+ #
100
+ # Behavior:
101
+ # - INSTANCE_POOLS set: uses InstancePools array, RoutingConfig, VariantInstanceProvisionTimeoutInSeconds
102
+ # Omits InstanceType entirely (mutually exclusive with pools)
103
+ # - INSTANCE_POOLS not set: uses single INSTANCE_TYPE (standard path)
104
+ # - INFERENCE_AMI_VERSION: appended to variant when set
105
+ # - CAPACITY_RESERVATION_ARN: appended to variant when set (only for single instance type path)
106
+ # - ASYNC_INFERENCE_CONFIG: passes --async-inference-config when set
107
+ # - ROLE_ARN + no MODEL_NAME_SM: passes --execution-role-arn (IC-based real-time flow)
108
+ # - MODEL_NAME_SM set: omits --execution-role-arn (model-based async flow)
109
+ create_endpoint_config() {
110
+ # Mutual exclusivity: capacity reservations and instance pools cannot be used together.
111
+ # Capacity reservations guarantee a specific instance type, while pools are for fallback
112
+ # across multiple types. If both are set, prefer the reservation.
113
+ if [ -n "${INSTANCE_POOLS:-}" ] && [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
114
+ echo "⚠️ Capacity reservations and instance pools are mutually exclusive. Using capacity reservation."
115
+ unset INSTANCE_POOLS
116
+ fi
117
+
118
+ local timestamp
119
+ timestamp=$(date +%s)
120
+ ENDPOINT_CONFIG_NAME="${PROJECT_NAME}-epc-${timestamp}"
121
+
122
+ local variant_json
123
+
124
+ if [ -n "${INSTANCE_POOLS:-}" ]; then
125
+ # Validate pool compatibility before proceeding
126
+ _validate_instance_pools
127
+
128
+ # Instance pools path: heterogeneous instance types with priority-based fallback
129
+ echo " Instance pools: enabled"
130
+
131
+ # Transform ModelName → ModelNameOverride for the SageMaker API.
132
+ # INSTANCE_POOLS config uses "ModelName" for readability; the API expects "ModelNameOverride"
133
+ # as a sibling of InstanceType and Priority within each pool entry.
134
+ local pools_json="${INSTANCE_POOLS}"
135
+ if echo "${pools_json}" | grep -q '"ModelName"'; then
136
+ pools_json=$(echo "${pools_json}" | sed 's/"ModelName"/"ModelNameOverride"/g')
137
+ echo " ModelNameOverride: per-pool model names detected"
138
+ fi
139
+
140
+ variant_json="[{\"VariantName\":\"AllTraffic\""
141
+ variant_json="${variant_json},\"InstancePools\":${pools_json}"
142
+ variant_json="${variant_json},\"InitialInstanceCount\":${POOL_INSTANCE_COUNT:-1}"
143
+ variant_json="${variant_json},\"VariantInstanceProvisionTimeoutInSeconds\":${POOL_TIMEOUT:-1200}"
144
+ variant_json="${variant_json},\"RoutingConfig\":{\"RoutingStrategy\":\"LEAST_OUTSTANDING_REQUESTS\"}"
145
+
146
+ # Optional: AMI version
147
+ if [ -n "${INFERENCE_AMI_VERSION:-}" ]; then
148
+ variant_json="${variant_json},\"InferenceAmiVersion\":\"${INFERENCE_AMI_VERSION}\""
149
+ echo " AMI version: ${INFERENCE_AMI_VERSION}"
150
+ fi
151
+
152
+ variant_json="${variant_json}}]"
153
+ else
154
+ # Standard path: single instance type
155
+ variant_json="[{\"VariantName\":\"AllTraffic\",\"InstanceType\":\"${INSTANCE_TYPE}\",\"InitialInstanceCount\":1"
156
+
157
+ # Optional: AMI version
158
+ if [ -n "${INFERENCE_AMI_VERSION:-}" ]; then
159
+ variant_json="${variant_json},\"InferenceAmiVersion\":\"${INFERENCE_AMI_VERSION}\""
160
+ echo " AMI version: ${INFERENCE_AMI_VERSION}"
161
+ fi
162
+
163
+ # Optional: capacity reservation
164
+ if [ -n "${CAPACITY_RESERVATION_ARN:-}" ]; then
165
+ variant_json="${variant_json},\"CapacityReservationConfig\":{\"CapacityReservationPreference\":\"capacity-reservations-only\",\"MlReservationArn\":\"${CAPACITY_RESERVATION_ARN}\"}"
166
+ echo " ⚠️ Capacity reservation (experimental): ${CAPACITY_RESERVATION_ARN}"
167
+ fi
168
+
169
+ variant_json="${variant_json}}]"
170
+ fi
171
+
172
+ # Build the AWS CLI command arguments
173
+ local -a cmd_args=(
174
+ aws sagemaker create-endpoint-config
175
+ --endpoint-config-name "${ENDPOINT_CONFIG_NAME}"
176
+ )
177
+
178
+ # Include --execution-role-arn for IC-based flow (real-time).
179
+ # Omit for model-based flow (async) where MODEL_NAME_SM is set.
180
+ if [ -n "${ROLE_ARN:-}" ] && [ -z "${MODEL_NAME_SM:-}" ]; then
181
+ cmd_args+=(--execution-role-arn "${ROLE_ARN}")
182
+ fi
183
+
184
+ cmd_args+=(--production-variants "${variant_json}")
185
+
186
+ # Optional: async inference config
187
+ if [ -n "${ASYNC_INFERENCE_CONFIG:-}" ]; then
188
+ cmd_args+=(--async-inference-config "${ASYNC_INFERENCE_CONFIG}")
189
+ fi
190
+
191
+ cmd_args+=(--region "${AWS_REGION}")
192
+
193
+ echo "⚙️ Creating endpoint configuration: ${ENDPOINT_CONFIG_NAME}"
194
+ if ! "${cmd_args[@]}"; then
195
+ echo "❌ Failed to create endpoint configuration"
196
+ echo " Check that:"
197
+ if [ -n "${ROLE_ARN:-}" ] && [ -z "${MODEL_NAME_SM:-}" ]; then
198
+ echo " • The execution role ARN is valid"
199
+ fi
200
+ if [ -n "${INSTANCE_POOLS:-}" ]; then
201
+ echo " • The instance pool types are valid and available in region: ${AWS_REGION}"
202
+ echo " • You have sufficient service quota for the pool instance types"
203
+ else
204
+ echo " • The instance type is valid: ${INSTANCE_TYPE}"
205
+ echo " • The instance type is available in region: ${AWS_REGION}"
206
+ echo " • You have sufficient service quota for the instance type"
207
+ fi
208
+ if [ -n "${ASYNC_INFERENCE_CONFIG:-}" ]; then
209
+ echo " • The async inference config is valid JSON"
210
+ echo " • The S3 output path and SNS topics are accessible"
211
+ fi
212
+ exit 4
213
+ fi
214
+
215
+ echo "✅ Endpoint configuration created: ${ENDPOINT_CONFIG_NAME}"
216
+ }
@@ -0,0 +1,167 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Shared helper: create SageMaker inference components.
5
+ # Sourced by do/deploy — expects the following to be set by the caller:
6
+ # PROJECT_NAME, ENDPOINT_NAME, ECR_REPOSITORY, AWS_REGION, CONTAINER_ENV_JSON
7
+ # Also expects _update_config_var() to be available (from wait.sh).
8
+
9
+ # create_inference_component <ic_config_file>
10
+ # Creates an inference component from a per-IC config file.
11
+ #
12
+ # The config file is sourced and should export:
13
+ # IC_IMAGE_TAG — container image tag (default: ${PROJECT_NAME}-latest)
14
+ # IC_GPU_COUNT — number of accelerator devices (default: 1)
15
+ # IC_COPY_COUNT — number of IC copies (default: 1)
16
+ # IC_MIN_MEMORY_MB — minimum memory in MB (default: 1024)
17
+ # IC_STARTUP_TIMEOUT — container startup health check timeout in seconds (default: 900)
18
+ # IC_CONTAINER_ENV_EXTRA — optional extra env vars in "KEY":"value" format
19
+ #
20
+ # Multi-spec support (for heterogeneous instance pools):
21
+ # IC_MULTI_SPEC — set to "true" to use Specifications (plural) array
22
+ # IC_SPEC_COUNT — number of spec entries (e.g., 2)
23
+ # IC_SPEC_N_INSTANCE_TYPE — instance type for spec entry N
24
+ # IC_SPEC_N_GPU_COUNT — GPU count for spec entry N
25
+ # IC_SPEC_N_MIN_MEMORY_MB — minimum memory for spec entry N
26
+ #
27
+ # Sets IC_DEPLOYED_NAME in the caller's scope (for use by wait_ic).
28
+ # Persists IC_DEPLOYED_NAME and IC_DEPLOYED_AT back to the IC config file.
29
+ # Echoes the IC name as return value.
30
+ create_inference_component() {
31
+ local ic_conf="$1"
32
+
33
+ if [ ! -f "${ic_conf}" ]; then
34
+ echo "❌ IC config file not found: ${ic_conf}"
35
+ exit 4
36
+ fi
37
+
38
+ # Source the IC config to get per-IC settings
39
+ source "${ic_conf}"
40
+
41
+ local ic_timestamp
42
+ ic_timestamp=$(date +%s)
43
+ local ic_basename
44
+ ic_basename=$(basename "${ic_conf}" .conf)
45
+ local ic_name="${PROJECT_NAME}-${ic_basename}-${ic_timestamp}"
46
+
47
+ # Build container spec JSON
48
+ local container_spec="{\"Image\":\"${ECR_REPOSITORY}:${IC_IMAGE_TAG:-${PROJECT_NAME}-latest}\""
49
+ if [ -n "${CONTAINER_ENV_JSON}${IC_CONTAINER_ENV_EXTRA:-}" ]; then
50
+ local env_json="${CONTAINER_ENV_JSON}"
51
+ [ -n "${IC_CONTAINER_ENV_EXTRA:-}" ] && env_json="${env_json:+${env_json},}${IC_CONTAINER_ENV_EXTRA}"
52
+ container_spec="${container_spec},\"Environment\":{${env_json}}"
53
+ fi
54
+ container_spec="${container_spec}}"
55
+
56
+ # Build specification JSON — multi-spec (Specifications array) or single (Specification object)
57
+ local spec_json
58
+ if [ "${IC_MULTI_SPEC:-false}" = "true" ] && [ "${IC_SPEC_COUNT:-0}" -gt 0 ]; then
59
+ # Multi-spec: build Specifications array with per-instance-type compute resources
60
+ spec_json="{\"Specifications\":["
61
+ local i=1
62
+ while [ "${i}" -le "${IC_SPEC_COUNT}" ]; do
63
+ local spec_instance_type_var="IC_SPEC_${i}_INSTANCE_TYPE"
64
+ local spec_gpu_count_var="IC_SPEC_${i}_GPU_COUNT"
65
+ local spec_min_memory_var="IC_SPEC_${i}_MIN_MEMORY_MB"
66
+
67
+ local spec_instance_type="${!spec_instance_type_var}"
68
+ local spec_gpu_count="${!spec_gpu_count_var:-1}"
69
+ local spec_min_memory="${!spec_min_memory_var:-1024}"
70
+
71
+ if [ "${i}" -gt 1 ]; then
72
+ spec_json="${spec_json},"
73
+ fi
74
+ spec_json="${spec_json}{\"Container\":${container_spec},\"StartupParameters\":{\"ContainerStartupHealthCheckTimeoutInSeconds\":${IC_STARTUP_TIMEOUT:-900}},\"ComputeResourceRequirements\":{\"NumberOfAcceleratorDevicesRequired\":${spec_gpu_count},\"MinMemoryRequiredInMb\":${spec_min_memory}}}"
75
+
76
+ i=$((i + 1))
77
+ done
78
+ spec_json="${spec_json}]}"
79
+ else
80
+ # Single spec: standard Specification object (existing behavior)
81
+ spec_json="{\"Container\":${container_spec},\"StartupParameters\":{\"ContainerStartupHealthCheckTimeoutInSeconds\":${IC_STARTUP_TIMEOUT:-900}},\"ComputeResourceRequirements\":{\"NumberOfAcceleratorDevicesRequired\":${IC_GPU_COUNT:-1},\"MinMemoryRequiredInMb\":${IC_MIN_MEMORY_MB:-1024}}}"
82
+ fi
83
+
84
+ echo "📦 Creating inference component: ${ic_name}"
85
+ if ! aws sagemaker create-inference-component \
86
+ --inference-component-name "${ic_name}" \
87
+ --endpoint-name "${ENDPOINT_NAME}" \
88
+ --variant-name "AllTraffic" \
89
+ --specification "${spec_json}" \
90
+ --runtime-config "{\"CopyCount\": ${IC_COPY_COUNT:-1}}" \
91
+ --region "${AWS_REGION}"; then
92
+
93
+ echo "❌ Failed to create inference component: ${ic_name}"
94
+ echo " Check that:"
95
+ echo " • The endpoint is InService: ${ENDPOINT_NAME}"
96
+ echo " • The container image exists: ${ECR_REPOSITORY}:${IC_IMAGE_TAG:-${PROJECT_NAME}-latest}"
97
+ echo " • GPU count (${IC_GPU_COUNT:-1}) does not exceed instance capacity"
98
+ echo " • You have sufficient permissions for sagemaker:CreateInferenceComponent"
99
+ exit 4
100
+ fi
101
+
102
+ # Persist deployed name and timestamp back to IC config
103
+ IC_DEPLOYED_NAME="${ic_name}"
104
+ IC_DEPLOYED_AT="${ic_timestamp}"
105
+ _update_config_var "IC_DEPLOYED_NAME" "${ic_name}" "${ic_conf}"
106
+ _update_config_var "IC_DEPLOYED_AT" "${ic_timestamp}" "${ic_conf}"
107
+
108
+ echo "✅ Inference component created: ${ic_name}"
109
+ echo "${ic_name}"
110
+ }
111
+
112
+ # create_inference_component_legacy()
113
+ # Backward-compatible IC creation for projects without do/ic/ directory.
114
+ # Reads IC_GPU_COUNT from do/config (already sourced) and IMAGE_TAG from caller scope.
115
+ # Uses the same endpoint and container env as the multi-IC path.
116
+ #
117
+ # Sets IC_DEPLOYED_NAME in the caller's scope (for use by wait_ic).
118
+ # Persists INFERENCE_COMPONENT_NAME to do/config.
119
+ create_inference_component_legacy() {
120
+ local ic_timestamp
121
+ ic_timestamp=$(date +%s)
122
+ local ic_name="${PROJECT_NAME}-ic-${ic_timestamp}"
123
+
124
+ # Build container spec JSON (uses IMAGE_TAG from caller scope)
125
+ local container_spec="{\"Image\":\"${ECR_REPOSITORY}:${IMAGE_TAG}\""
126
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
127
+ container_spec="${container_spec},\"Environment\":{${CONTAINER_ENV_JSON}}"
128
+ fi
129
+ container_spec="${container_spec}}"
130
+
131
+ echo "📦 Creating inference component: ${ic_name}"
132
+ if ! aws sagemaker create-inference-component \
133
+ --inference-component-name "${ic_name}" \
134
+ --endpoint-name "${ENDPOINT_NAME}" \
135
+ --variant-name "AllTraffic" \
136
+ --specification "{
137
+ \"Container\": ${container_spec},
138
+ \"StartupParameters\": {
139
+ \"ContainerStartupHealthCheckTimeoutInSeconds\": 900
140
+ },
141
+ \"ComputeResourceRequirements\": {
142
+ \"NumberOfAcceleratorDevicesRequired\": ${IC_GPU_COUNT:-1},
143
+ \"MinMemoryRequiredInMb\": 1024
144
+ }
145
+ }" \
146
+ --runtime-config "{\"CopyCount\": 1}" \
147
+ --region "${AWS_REGION}"; then
148
+
149
+ echo "❌ Failed to create inference component: ${ic_name}"
150
+ echo " Check that:"
151
+ echo " • The endpoint is InService: ${ENDPOINT_NAME}"
152
+ echo " • The container image exists: ${ECR_REPOSITORY}:${IMAGE_TAG}"
153
+ echo " • GPU count (${IC_GPU_COUNT:-1}) does not exceed instance capacity"
154
+ echo " • You have sufficient permissions for sagemaker:CreateInferenceComponent"
155
+ exit 4
156
+ fi
157
+
158
+ # Set in caller's scope for wait_ic
159
+ IC_DEPLOYED_NAME="${ic_name}"
160
+ IC_DEPLOYED_AT="${ic_timestamp}"
161
+
162
+ # Persist to do/config for legacy compatibility
163
+ _update_config_var "INFERENCE_COMPONENT_NAME" "${ic_name}"
164
+ _update_config_var "IC_DEPLOYED_AT" "${ic_timestamp}"
165
+
166
+ echo "✅ Inference component created: ${ic_name}"
167
+ }
@@ -0,0 +1,44 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Shared helper: resolve container secrets from Secrets Manager or direct values.
5
+ # Sourced by do/deploy — expects AWS_REGION to be set by the caller.
6
+
7
+ # resolve_secrets()
8
+ # Resolves HF_TOKEN and NGC_API_KEY from either:
9
+ # - AWS Secrets Manager (when *_ARN variables are set)
10
+ # - Direct values (when the plain variables are set)
11
+ # Sets the global CONTAINER_ENV_JSON variable with comma-separated "KEY":"value" pairs.
12
+ resolve_secrets() {
13
+ CONTAINER_ENV_JSON=""
14
+
15
+ if [ -n "${HF_TOKEN_ARN:-}" ]; then
16
+ echo "🔐 Resolving HuggingFace token from Secrets Manager..."
17
+ RESOLVED_HF_TOKEN=$(aws secretsmanager get-secret-value --secret-id "${HF_TOKEN_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
18
+ echo "❌ Failed to resolve HuggingFace token from Secrets Manager"
19
+ exit 3
20
+ }
21
+ CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${RESOLVED_HF_TOKEN}\""
22
+ elif [ -n "${HF_TOKEN:-}" ]; then
23
+ CONTAINER_ENV_JSON="\"HF_TOKEN\":\"${HF_TOKEN}\""
24
+ fi
25
+
26
+ if [ -n "${NGC_API_KEY_ARN:-}" ]; then
27
+ echo "🔐 Resolving NGC API key from Secrets Manager..."
28
+ RESOLVED_NGC_KEY=$(aws secretsmanager get-secret-value --secret-id "${NGC_API_KEY_ARN}" --query SecretString --output text --region "${AWS_REGION}") || {
29
+ echo "❌ Failed to resolve NGC API key from Secrets Manager"
30
+ exit 3
31
+ }
32
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
33
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
34
+ else
35
+ CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${RESOLVED_NGC_KEY}\""
36
+ fi
37
+ elif [ -n "${NGC_API_KEY:-}" ]; then
38
+ if [ -n "${CONTAINER_ENV_JSON}" ]; then
39
+ CONTAINER_ENV_JSON="${CONTAINER_ENV_JSON},\"NGC_API_KEY\":\"${NGC_API_KEY}\""
40
+ else
41
+ CONTAINER_ENV_JSON="\"NGC_API_KEY\":\"${NGC_API_KEY}\""
42
+ fi
43
+ fi
44
+ }
@@ -0,0 +1,131 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Shared helper: wait/polling functions and config persistence utilities.
5
+ # Sourced by do/deploy — expects AWS_REGION and SCRIPT_DIR to be set by the caller.
6
+
7
+ # _update_config_var <var_name> <var_value> [config_file]
8
+ # Persist a variable to a config file so other scripts can use it.
9
+ # If the variable already exists, update it in place; otherwise append.
10
+ # Defaults to ${SCRIPT_DIR}/config if no config_file is specified.
11
+ _update_config_var() {
12
+ local var_name="$1" var_value="$2" config_file="${3:-${SCRIPT_DIR}/config}"
13
+ if grep -q "^export ${var_name}=" "${config_file}" 2>/dev/null; then
14
+ sed -i.bak "s|^export ${var_name}=.*|export ${var_name}=\"${var_value}\"|" "${config_file}"
15
+ rm -f "${config_file}.bak"
16
+ else
17
+ echo "" >> "${config_file}"
18
+ echo "export ${var_name}=\"${var_value}\"" >> "${config_file}"
19
+ fi
20
+ }
21
+
22
+ # _get_endpoint_status <endpoint_name>
23
+ # Query a SageMaker endpoint status. Returns empty string if not found.
24
+ _get_endpoint_status() {
25
+ aws sagemaker describe-endpoint \
26
+ --endpoint-name "$1" \
27
+ --region "${AWS_REGION}" \
28
+ --query EndpointStatus \
29
+ --output text 2>/dev/null || echo ""
30
+ }
31
+
32
+ # _get_ic_status <inference_component_name>
33
+ # Query a SageMaker inference component status. Returns empty string if not found.
34
+ _get_ic_status() {
35
+ aws sagemaker describe-inference-component \
36
+ --inference-component-name "$1" \
37
+ --region "${AWS_REGION}" \
38
+ --query InferenceComponentStatus \
39
+ --output text 2>/dev/null || echo ""
40
+ }
41
+
42
+ # _find_active_ic_on_endpoint <endpoint_name>
43
+ # Find an InService inference component on an endpoint.
44
+ # Returns the first match or empty string.
45
+ _find_active_ic_on_endpoint() {
46
+ aws sagemaker list-inference-components \
47
+ --endpoint-name "$1" \
48
+ --status-equals InService \
49
+ --region "${AWS_REGION}" \
50
+ --query 'InferenceComponents[0].InferenceComponentName' \
51
+ --output text 2>/dev/null || echo ""
52
+ }
53
+
54
+ # wait_endpoint <endpoint_name>
55
+ # Wait for a SageMaker endpoint to reach InService status.
56
+ # Detects credential expiry vs actual failure and exits with code 4 on error.
57
+ wait_endpoint() {
58
+ local endpoint_name="$1"
59
+
60
+ if ! aws sagemaker wait endpoint-in-service \
61
+ --endpoint-name "${endpoint_name}" \
62
+ --region "${AWS_REGION}"; then
63
+
64
+ # Check if it was a credential expiration vs actual failure
65
+ local ep_check
66
+ ep_check=$(_get_endpoint_status "${endpoint_name}" 2>/dev/null)
67
+ if [ "${ep_check}" = "Creating" ]; then
68
+ echo ""
69
+ echo "⚠️ Wait interrupted (credentials may have expired), but endpoint is still creating."
70
+ echo " Refresh your credentials and re-run ./do/deploy to resume."
71
+ exit 4
72
+ fi
73
+
74
+ echo "❌ Endpoint failed to reach InService status"
75
+ echo " Check CloudWatch Logs for details:"
76
+ echo " https://console.aws.amazon.com/cloudwatch/home?region=${AWS_REGION}#logsV2:log-groups/log-group//aws/sagemaker/Endpoints/${endpoint_name}"
77
+ exit 4
78
+ fi
79
+ }
80
+
81
+ # wait_ic <ic_name> [timeout]
82
+ # Poll an inference component until it reaches InService or fails.
83
+ # Default timeout is 1800 seconds (30 minutes).
84
+ # Reports status every 30 seconds. Detects credential expiry.
85
+ # Exits with code 4 on failure or timeout.
86
+ wait_ic() {
87
+ local ic_name="$1"
88
+ local timeout="${2:-1800}"
89
+ local wait_start
90
+ wait_start=$(date +%s)
91
+
92
+ while true; do
93
+ local ic_status
94
+ ic_status=$(_get_ic_status "${ic_name}" 2>/dev/null)
95
+
96
+ case "${ic_status}" in
97
+ InService)
98
+ break
99
+ ;;
100
+ Failed)
101
+ echo "❌ Inference component failed to reach InService status"
102
+ echo " Check CloudWatch Logs for details:"
103
+ echo " https://console.aws.amazon.com/cloudwatch/home?region=${AWS_REGION}#logsV2:log-groups/log-group//aws/sagemaker/Endpoints/${ENDPOINT_NAME:-unknown}"
104
+ echo ""
105
+ echo " Debug:"
106
+ echo " aws sagemaker describe-inference-component --inference-component-name ${ic_name} --region ${AWS_REGION}"
107
+ exit 4
108
+ ;;
109
+ Creating)
110
+ local elapsed=$(( $(date +%s) - wait_start ))
111
+ if [ "${elapsed}" -ge "${timeout}" ]; then
112
+ echo ""
113
+ echo "⚠️ Inference component still creating after ${timeout}s."
114
+ echo " Re-run ./do/deploy to resume waiting."
115
+ exit 4
116
+ fi
117
+ echo " $(date +%H:%M:%S) Status: Creating (${elapsed}s elapsed)..."
118
+ sleep 30
119
+ ;;
120
+ "")
121
+ echo "⚠️ Could not determine inference component status (credentials may have expired)."
122
+ echo " Re-run ./do/deploy to resume."
123
+ exit 4
124
+ ;;
125
+ *)
126
+ echo " $(date +%H:%M:%S) Status: ${ic_status}..."
127
+ sleep 30
128
+ ;;
129
+ esac
130
+ done
131
+ }